Skip to content

Commit 195fcd2

Browse files
committed
Implement extra resources for test actions
Add support for user-specified resource types in the resource manager. This generalizes the CPU, RAM and "test count" resource support for other resource types such as the number of GPUs, available GPU memory, the number of embedded devices connected to the host, etc. The available amount of extra resources can be specified using the new --local_extra_resources=<resourcename>=<amount> command line flag, which is analoguous to the existing --local_cpu_resources and --local_memory_resources flags. Tests can then declare the amount of extra resources they need by using a "resources:<resourcename>:<amount>" tag.
1 parent 9a765c8 commit 195fcd2

File tree

10 files changed

+272
-30
lines changed

10 files changed

+272
-30
lines changed

src/main/java/com/google/devtools/build/lib/actions/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ java_library(
124124
"//third_party:flogger",
125125
"//third_party:guava",
126126
"//third_party:jsr305",
127+
"//third_party:rxjava3",
127128
"//third_party/protobuf:protobuf_java",
128129
],
129130
)
@@ -305,6 +306,7 @@ java_library(
305306
"//src/main/java/com/google/devtools/common/options",
306307
"//third_party:flogger",
307308
"//third_party:guava",
309+
"//third_party:rxjava3",
308310
],
309311
)
310312

src/main/java/com/google/devtools/build/lib/actions/ExecutionRequirements.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,35 @@ public String parseIfMatches(String tag) throws ValidationException {
152152
return null;
153153
});
154154

155+
/** How many extra resources an action requires for execution. */
156+
public static final ParseableRequirement RESOURCES =
157+
ParseableRequirement.create(
158+
"resources:<str>:<int>",
159+
Pattern.compile("resources:(.+:.+)"),
160+
s -> {
161+
Preconditions.checkNotNull(s);
162+
163+
int splitIndex = s.indexOf(":");
164+
String resourceCount = s.substring(splitIndex+1);
165+
int value;
166+
try {
167+
value = Integer.parseInt(resourceCount);
168+
} catch (NumberFormatException e) {
169+
return "can't be parsed as an integer";
170+
}
171+
172+
// De-and-reserialize & compare to only allow canonical integer formats.
173+
if (!Integer.toString(value).equals(resourceCount)) {
174+
return "must be in canonical format (e.g. '4' instead of '+04')";
175+
}
176+
177+
if (value < 1) {
178+
return "can't be zero or negative";
179+
}
180+
181+
return null;
182+
});
183+
155184
/** If an action supports running in persistent worker mode. */
156185
public static final String SUPPORTS_WORKERS = "supports-workers";
157186

src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@
2525
import com.google.devtools.build.lib.util.OS;
2626
import com.google.devtools.build.lib.util.Pair;
2727
import java.io.IOException;
28+
import java.util.HashMap;
29+
import java.util.HashSet;
2830
import java.util.Iterator;
2931
import java.util.LinkedList;
3032
import java.util.List;
33+
import java.util.Map;
34+
import java.util.Set;
3135
import java.util.concurrent.CountDownLatch;
3236

3337
/**
@@ -137,6 +141,10 @@ public static ResourceManager instance() {
137141
// definition in the ResourceSet class.
138142
private double usedRam;
139143

144+
// Used amount of extra resources. Corresponds to the extra resource
145+
// definition in the ResourceSet class.
146+
private Map<String, Float> usedExtraResources;
147+
140148
// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
141149
private int usedLocalTestCount;
142150

@@ -145,6 +153,7 @@ public static ResourceManager instance() {
145153

146154
private ResourceManager() {
147155
requestList = new LinkedList<>();
156+
usedExtraResources = new HashMap<>();
148157
}
149158

150159
@VisibleForTesting public static ResourceManager instanceForTestingOnly() {
@@ -158,6 +167,7 @@ private ResourceManager() {
158167
public synchronized void resetResourceUsage() {
159168
usedCpu = 0;
160169
usedRam = 0;
170+
usedExtraResources = new HashMap<>();
161171
usedLocalTestCount = 0;
162172
for (Pair<ResourceSet, CountDownLatch> request : requestList) {
163173
// CountDownLatch can be set only to 0 or 1.
@@ -177,6 +187,7 @@ public synchronized void setAvailableResources(ResourceSet resources) {
177187
ResourceSet.create(
178188
staticResources.getMemoryMb(),
179189
staticResources.getCpuUsage(),
190+
staticResources.getExtraResourceUsage(),
180191
staticResources.getLocalTestCount());
181192
processWaitingThreads();
182193
}
@@ -265,14 +276,25 @@ ResourceHandle tryAcquire(ActionExecutionMetadata owner, ResourceSet resources)
265276
private void incrementResources(ResourceSet resources) {
266277
usedCpu += resources.getCpuUsage();
267278
usedRam += resources.getMemoryMb();
279+
280+
resources.getExtraResourceUsage().entrySet().forEach(
281+
resource -> {
282+
String key = (String)resource.getKey();
283+
float value = resource.getValue();
284+
if (usedExtraResources.containsKey(key)) {
285+
value += (float)usedExtraResources.get(key);
286+
}
287+
usedExtraResources.put(key, value);
288+
});
289+
268290
usedLocalTestCount += resources.getLocalTestCount();
269291
}
270292

271293
/**
272294
* Return true if any resources have been claimed through this manager.
273295
*/
274296
public synchronized boolean inUse() {
275-
return usedCpu != 0.0 || usedRam != 0.0 || usedLocalTestCount != 0 || !requestList.isEmpty();
297+
return usedCpu != 0.0 || usedRam != 0.0 || !usedExtraResources.isEmpty() || usedLocalTestCount != 0 || !requestList.isEmpty();
276298
}
277299

278300

@@ -323,6 +345,13 @@ private synchronized CountDownLatch acquire(ResourceSet resources) {
323345
private synchronized boolean release(ResourceSet resources) {
324346
usedCpu -= resources.getCpuUsage();
325347
usedRam -= resources.getMemoryMb();
348+
349+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
350+
String key = (String)resource.getKey();
351+
float value = (float)usedExtraResources.get(key) - resource.getValue();
352+
usedExtraResources.put(key, value);
353+
}
354+
326355
usedLocalTestCount -= resources.getLocalTestCount();
327356

328357
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
@@ -334,6 +363,20 @@ private synchronized boolean release(ResourceSet resources) {
334363
if (usedRam < epsilon) {
335364
usedRam = 0;
336365
}
366+
367+
Set<String> toRemove = new HashSet<>();
368+
usedExtraResources.entrySet().forEach(
369+
resource -> {
370+
String key = (String)resource.getKey();
371+
float value = (float)usedExtraResources.get(key);
372+
if (value < epsilon) {
373+
toRemove.add(key);
374+
}
375+
});
376+
for (String key : toRemove) {
377+
usedExtraResources.remove(key);
378+
}
379+
337380
if (!requestList.isEmpty()) {
338381
processWaitingThreads();
339382
return true;
@@ -361,12 +404,29 @@ private synchronized void processWaitingThreads() {
361404
}
362405
}
363406

407+
/**
408+
* Return true iff all requested extra resources are considered to be available.
409+
*/
410+
private boolean areExtraResourcesAvailable(ResourceSet resources) {
411+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
412+
String key = (String)resource.getKey();
413+
float used = (float)usedExtraResources.getOrDefault(key, 0f);
414+
float requested = resource.getValue();
415+
float available = (float)availableResources.getExtraResourceUsage().getOrDefault(key, 0f);
416+
float epsilon = 0.0001f; // Account for possible rounding errors.
417+
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
418+
return false;
419+
}
420+
}
421+
return true;
422+
}
423+
364424
// Method will return true if all requested resources are considered to be available.
365425
private boolean areResourcesAvailable(ResourceSet resources) {
366426
Preconditions.checkNotNull(availableResources);
367427
// Comparison below is robust, since any calculation errors will be fixed
368428
// by the release() method.
369-
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0) {
429+
if (usedCpu == 0.0 && usedRam == 0.0 && usedExtraResources.isEmpty() && usedLocalTestCount == 0) {
370430
return true;
371431
}
372432
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
@@ -410,9 +470,10 @@ private boolean areResourcesAvailable(ResourceSet resources) {
410470
// 3) If used resource amount is less than total available resource amount.
411471
boolean cpuIsAvailable = cpu == 0.0 || usedCpu == 0.0 || usedCpu + cpu <= availableCpu;
412472
boolean ramIsAvailable = ram == 0.0 || usedRam == 0.0 || ram <= remainingRam;
473+
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
413474
boolean localTestCountIsAvailable = localTestCount == 0 || usedLocalTestCount == 0
414475
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
415-
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable;
476+
return cpuIsAvailable && ramIsAvailable && extraResourcesIsAvailable && localTestCountIsAvailable;
416477
}
417478

418479
@VisibleForTesting

src/main/java/com/google/devtools/build/lib/actions/ResourceSet.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414

1515
package com.google.devtools.build.lib.actions;
1616

17+
import com.google.common.base.Joiner;
1718
import com.google.common.base.Splitter;
19+
import com.google.common.collect.ImmutableMap;
1820
import com.google.common.primitives.Doubles;
1921
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
2022
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
2123
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
2224
import com.google.devtools.common.options.Converter;
2325
import com.google.devtools.common.options.OptionsParsingException;
26+
import io.reactivex.rxjava3.annotations.NonNull;
2427
import java.util.Iterator;
2528
import java.util.NoSuchElementException;
2629

@@ -43,12 +46,23 @@ public class ResourceSet implements ResourceSetOrBuilder {
4346
/** The number of CPUs, or fractions thereof. */
4447
private final double cpuUsage;
4548

49+
/**
50+
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping
51+
* name of the resource to a value.
52+
*/
53+
private final ImmutableMap<String, Float> extraResourceUsage;
54+
4655
/** The number of local tests. */
4756
private final int localTestCount;
4857

4958
private ResourceSet(double memoryMb, double cpuUsage, int localTestCount) {
59+
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount);
60+
}
61+
62+
private ResourceSet(double memoryMb, double cpuUsage, @NonNull ImmutableMap<String, Float> extraResourceUsage, int localTestCount) {
5063
this.memoryMb = memoryMb;
5164
this.cpuUsage = cpuUsage;
65+
this.extraResourceUsage = extraResourceUsage;
5266
this.localTestCount = localTestCount;
5367
}
5468

@@ -74,18 +88,28 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) {
7488
}
7589

7690
/**
77-
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and
91+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and
92+
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or
93+
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing
94+
* ResourceSets that represent available resources.
95+
*/
96+
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) {
97+
return ResourceSet.create(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount);
98+
}
99+
100+
/**
101+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and
78102
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or
79103
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing
80104
* ResourceSets that represent available resources.
81105
*/
82106
@AutoCodec.Instantiator
83107
public static ResourceSet create(
84-
double memoryMb, double cpuUsage, int localTestCount) {
85-
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0) {
108+
double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount) {
109+
if (memoryMb == 0 && cpuUsage == 0 && extraResourceUsage.size() == 0 && localTestCount == 0) {
86110
return ZERO;
87111
}
88-
return new ResourceSet(memoryMb, cpuUsage, localTestCount);
112+
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount);
89113
}
90114

91115
/** Returns the amount of real memory (resident set size) used in MB. */
@@ -105,6 +129,10 @@ public double getCpuUsage() {
105129
return cpuUsage;
106130
}
107131

132+
public ImmutableMap<String, Float> getExtraResourceUsage() {
133+
return extraResourceUsage;
134+
}
135+
108136
/** Returns the local test count used. */
109137
public int getLocalTestCount() {
110138
return localTestCount;
@@ -115,6 +143,7 @@ public String toString() {
115143
return "Resources: \n"
116144
+ "Memory: " + memoryMb + "M\n"
117145
+ "CPU: " + cpuUsage + "\n"
146+
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet())
118147
+ "Local tests: " + localTestCount + "\n";
119148
}
120149

src/main/java/com/google/devtools/build/lib/analysis/test/TestTargetProperties.java

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
3232
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
3333
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code;
34+
import java.util.HashMap;
3435
import java.util.List;
3536
import java.util.Map;
3637

@@ -162,32 +163,22 @@ public boolean isPersistentTestRunner() {
162163
return isPersistentTestRunner;
163164
}
164165

165-
public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs)
166-
throws UserExecException {
167-
if (usingLocalTestJobs) {
168-
return LOCAL_TEST_JOBS_BASED_RESOURCES;
169-
}
170-
166+
private double getLocalCpuResourceUsage(Label label) throws UserExecException {
171167
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
172-
173168
// Tests can override their CPU reservation with a "cpu:<n>" tag.
174-
ResourceSet testResourcesFromTag = null;
169+
double cpuCount = -1.0;
175170
for (String tag : executionInfo.keySet()) {
176171
try {
177172
String cpus = ExecutionRequirements.CPU.parseIfMatches(tag);
178173
if (cpus != null) {
179-
if (testResourcesFromTag != null) {
174+
if (cpuCount != -1.0) {
180175
String message =
181176
String.format(
182177
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
183178
label, ExecutionRequirements.CPU.userFriendlyName());
184179
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
185180
}
186-
testResourcesFromTag =
187-
ResourceSet.create(
188-
testResourcesFromSize.getMemoryMb(),
189-
Float.parseFloat(cpus),
190-
testResourcesFromSize.getLocalTestCount());
181+
cpuCount = Float.parseFloat(cpus);
191182
}
192183
} catch (ValidationException e) {
193184
String message =
@@ -200,8 +191,54 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
200191
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
201192
}
202193
}
194+
return cpuCount != -1.0 ? cpuCount : testResourcesFromSize.getCpuUsage();
195+
}
203196

204-
return testResourcesFromTag != null ? testResourcesFromTag : testResourcesFromSize;
197+
private ImmutableMap<String, Float> getLocalExtraResourceUsage(Label label) throws UserExecException {
198+
// Tests can specify requirements for extra resources using "resources:<resourcename>:<amount>" tag.
199+
Map<String, Float> extraResources = new HashMap<>();
200+
for (String tag : executionInfo.keySet()) {
201+
try {
202+
String extras = ExecutionRequirements.RESOURCES.parseIfMatches(tag);
203+
if (extras != null) {
204+
int splitIndex = extras.indexOf(":");
205+
String resourceName = extras.substring(0, splitIndex);
206+
String resourceCount = extras.substring(splitIndex+1);
207+
if (extraResources.get(resourceName) != null) {
208+
String message =
209+
String.format(
210+
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
211+
label, ExecutionRequirements.RESOURCES.userFriendlyName());
212+
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
213+
}
214+
extraResources.put(resourceName, Float.parseFloat(resourceCount));
215+
}
216+
} catch (ValidationException e) {
217+
String message =
218+
String.format(
219+
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s",
220+
label,
221+
ExecutionRequirements.RESOURCES.userFriendlyName(),
222+
e.getTagValue(),
223+
e.getMessage());
224+
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
225+
}
226+
}
227+
return ImmutableMap.copyOf(extraResources);
228+
}
229+
230+
public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs)
231+
throws UserExecException {
232+
if (usingLocalTestJobs) {
233+
return LOCAL_TEST_JOBS_BASED_RESOURCES;
234+
}
235+
236+
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
237+
return ResourceSet.create(
238+
testResourcesFromSize.getMemoryMb(),
239+
getLocalCpuResourceUsage(label),
240+
getLocalExtraResourceUsage(label),
241+
testResourcesFromSize.getLocalTestCount());
205242
}
206243

207244
private static FailureDetail createFailureDetail(String message, Code detailedCode) {

0 commit comments

Comments
 (0)