Skip to content

Commit 1b13d2f

Browse files
sceleDrew Macrae
authored and
Drew Macrae
committed
Implement extra resources for test actions
Add support for user-specified resource types in the resource manager. This generalizes the CPU, RAM and "test count" resource support for other resource types such as the number of GPUs, available GPU memory, the number of embedded devices connected to the host, etc. The available amount of extra resources can be specified using the new --local_extra_resources=<resourcename>=<amount> command line flag, which is analoguous to the existing --local_cpu_resources and --local_memory_resources flags. Tests can then declare the amount of extra resources they need by using a "resources:<resourcename>:<amount>" tag.
1 parent 09da33b commit 1b13d2f

File tree

10 files changed

+260
-24
lines changed

10 files changed

+260
-24
lines changed

src/main/java/com/google/devtools/build/lib/actions/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ java_library(
134134
"//third_party:flogger",
135135
"//third_party:guava",
136136
"//third_party:jsr305",
137+
"//third_party:rxjava3",
137138
"//third_party/protobuf:protobuf_java",
138139
],
139140
)
@@ -318,6 +319,7 @@ java_library(
318319
"//third_party:flogger",
319320
"//third_party:guava",
320321
"//third_party:jsr305",
322+
"//third_party:rxjava3",
321323
],
322324
)
323325

src/main/java/com/google/devtools/build/lib/actions/ExecutionRequirements.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,35 @@ public String parseIfMatches(String tag) throws ValidationException {
154154
return null;
155155
});
156156

157+
/** How many extra resources an action requires for execution. */
158+
public static final ParseableRequirement RESOURCES =
159+
ParseableRequirement.create(
160+
"resources:<str>:<int>",
161+
Pattern.compile("resources:(.+:.+)"),
162+
s -> {
163+
Preconditions.checkNotNull(s);
164+
165+
int splitIndex = s.indexOf(":");
166+
String resourceCount = s.substring(splitIndex+1);
167+
int value;
168+
try {
169+
value = Integer.parseInt(resourceCount);
170+
} catch (NumberFormatException e) {
171+
return "can't be parsed as an integer";
172+
}
173+
174+
// De-and-reserialize & compare to only allow canonical integer formats.
175+
if (!Integer.toString(value).equals(resourceCount)) {
176+
return "must be in canonical format (e.g. '4' instead of '+04')";
177+
}
178+
179+
if (value < 1) {
180+
return "can't be zero or negative";
181+
}
182+
183+
return null;
184+
});
185+
157186
/** If an action supports running in persistent worker mode. */
158187
public static final String SUPPORTS_WORKERS = "supports-workers";
159188

src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
import com.google.devtools.build.lib.worker.WorkerPool;
2828
import java.io.IOException;
2929
import java.util.Deque;
30+
import java.util.HashMap;
31+
import java.util.HashSet;
3032
import java.util.Iterator;
3133
import java.util.LinkedList;
34+
import java.util.Map;
35+
import java.util.Set;
3236
import java.util.concurrent.CountDownLatch;
3337
import javax.annotation.Nullable;
3438

@@ -183,13 +187,19 @@ public double getUsedCPU() {
183187
// definition in the ResourceSet class.
184188
private double usedRam;
185189

190+
// Used amount of extra resources. Corresponds to the extra resource
191+
// definition in the ResourceSet class.
192+
private Map<String, Float> usedExtraResources;
193+
186194
// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
187195
private int usedLocalTestCount;
188196

189197
/** If set, local-only actions are given priority over dynamically run actions. */
190198
private boolean prioritizeLocalActions;
191199

192-
private ResourceManager() {}
200+
private ResourceManager() {
201+
usedExtraResources = new HashMap<>();
202+
}
193203

194204
@VisibleForTesting
195205
public static ResourceManager instanceForTestingOnly() {
@@ -204,6 +214,7 @@ public static ResourceManager instanceForTestingOnly() {
204214
public synchronized void resetResourceUsage() {
205215
usedCpu = 0;
206216
usedRam = 0;
217+
usedExtraResources = new HashMap<>();
207218
usedLocalTestCount = 0;
208219
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
209220
request.second.latch.countDown();
@@ -298,6 +309,17 @@ private Worker incrementResources(ResourceSet resources)
298309
throws IOException, InterruptedException {
299310
usedCpu += resources.getCpuUsage();
300311
usedRam += resources.getMemoryMb();
312+
313+
resources.getExtraResourceUsage().entrySet().forEach(
314+
resource -> {
315+
String key = (String)resource.getKey();
316+
float value = resource.getValue();
317+
if (usedExtraResources.containsKey(key)) {
318+
value += (float)usedExtraResources.get(key);
319+
}
320+
usedExtraResources.put(key, value);
321+
});
322+
301323
usedLocalTestCount += resources.getLocalTestCount();
302324

303325
if (resources.getWorkerKey() != null) {
@@ -310,6 +332,7 @@ private Worker incrementResources(ResourceSet resources)
310332
public synchronized boolean inUse() {
311333
return usedCpu != 0.0
312334
|| usedRam != 0.0
335+
|| !usedExtraResources.isEmpty()
313336
|| usedLocalTestCount != 0
314337
|| !localRequests.isEmpty()
315338
|| !dynamicWorkerRequests.isEmpty()
@@ -417,6 +440,13 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
417440
private synchronized void releaseResourcesOnly(ResourceSet resources) {
418441
usedCpu -= resources.getCpuUsage();
419442
usedRam -= resources.getMemoryMb();
443+
444+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
445+
String key = (String)resource.getKey();
446+
float value = (float)usedExtraResources.get(key) - resource.getValue();
447+
usedExtraResources.put(key, value);
448+
}
449+
420450
usedLocalTestCount -= resources.getLocalTestCount();
421451

422452
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
@@ -428,6 +458,19 @@ private synchronized void releaseResourcesOnly(ResourceSet resources) {
428458
if (usedRam < epsilon) {
429459
usedRam = 0;
430460
}
461+
462+
Set<String> toRemove = new HashSet<>();
463+
usedExtraResources.entrySet().forEach(
464+
resource -> {
465+
String key = (String)resource.getKey();
466+
float value = (float)usedExtraResources.get(key);
467+
if (value < epsilon) {
468+
toRemove.add(key);
469+
}
470+
});
471+
for (String key : toRemove) {
472+
usedExtraResources.remove(key);
473+
}
431474
}
432475

433476
private synchronized boolean processAllWaitingThreads() throws IOException, InterruptedException {
@@ -466,6 +509,23 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
466509
}
467510
}
468511

512+
/**
513+
* Return true iff all requested extra resources are considered to be available.
514+
*/
515+
private boolean areExtraResourcesAvailable(ResourceSet resources) {
516+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
517+
String key = (String)resource.getKey();
518+
float used = (float)usedExtraResources.getOrDefault(key, 0f);
519+
float requested = resource.getValue();
520+
float available = (float)availableResources.getExtraResourceUsage().getOrDefault(key, 0f);
521+
float epsilon = 0.0001f; // Account for possible rounding errors.
522+
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
523+
return false;
524+
}
525+
}
526+
return true;
527+
}
528+
469529
// Method will return true if all requested resources are considered to be available.
470530
@VisibleForTesting
471531
boolean areResourcesAvailable(ResourceSet resources) {
@@ -484,7 +544,7 @@ boolean areResourcesAvailable(ResourceSet resources) {
484544
workerKey == null
485545
|| (activeWorkers < availableWorkers && workerPool.couldBeBorrowed(workerKey));
486546

487-
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0 && workerIsAvailable) {
547+
if (usedCpu == 0.0 && usedRam == 0.0 && usedExtraResources.isEmpty() && usedLocalTestCount == 0 && workerIsAvailable) {
488548
return true;
489549
}
490550
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
@@ -515,7 +575,8 @@ boolean areResourcesAvailable(ResourceSet resources) {
515575
localTestCount == 0
516576
|| usedLocalTestCount == 0
517577
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
518-
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable && workerIsAvailable;
578+
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
579+
return cpuIsAvailable && ramIsAvailable && extraResourcesIsAvailable && localTestCountIsAvailable && workerIsAvailable;
519580
}
520581

521582
@VisibleForTesting

src/main/java/com/google/devtools/build/lib/actions/ResourceSet.java

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414

1515
package com.google.devtools.build.lib.actions;
1616

17+
import com.google.common.base.Joiner;
1718
import com.google.common.base.Splitter;
19+
import com.google.common.collect.ImmutableMap;
1820
import com.google.common.primitives.Doubles;
1921
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
2022
import com.google.devtools.build.lib.util.OS;
2123
import com.google.devtools.build.lib.worker.WorkerKey;
2224
import com.google.devtools.common.options.Converter;
2325
import com.google.devtools.common.options.OptionsParsingException;
26+
import io.reactivex.rxjava3.annotations.NonNull;
2427
import java.util.Iterator;
2528
import java.util.NoSuchElementException;
2629
import javax.annotation.Nullable;
@@ -43,16 +46,26 @@ public class ResourceSet implements ResourceSetOrBuilder {
4346
/** The number of CPUs, or fractions thereof. */
4447
private final double cpuUsage;
4548

49+
/**
50+
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping
51+
* name of the resource to a value.
52+
*/
53+
private final ImmutableMap<String, Float> extraResourceUsage;
54+
4655
/** The number of local tests. */
4756
private final int localTestCount;
4857

4958
/** The workerKey of used worker. Null if no worker is used. */
5059
@Nullable private final WorkerKey workerKey;
5160

52-
private ResourceSet(
53-
double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
61+
private ResourceSet(double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
62+
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, workerKey);
63+
}
64+
65+
private ResourceSet(double memoryMb, double cpuUsage, @NonNull ImmutableMap<String, Float> extraResourceUsage, int localTestCount, @Nullable WorkerKey workerKey) {
5466
this.memoryMb = memoryMb;
5567
this.cpuUsage = cpuUsage;
68+
this.extraResourceUsage = extraResourceUsage;
5669
this.localTestCount = localTestCount;
5770
this.workerKey = workerKey;
5871
}
@@ -83,21 +96,31 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) {
8396
}
8497

8598
/**
86-
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and
99+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and
87100
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or {@link
88101
* #createWithLocalTestCount(int)}. Use this method primarily when constructing ResourceSets that
89102
* represent available resources.
90103
*/
91104
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) {
92-
return createWithWorkerKey(memoryMb, cpuUsage, localTestCount, /* workerKey= */ null);
105+
return ResourceSet.create(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, /* wolkerKey= */ null);
106+
}
107+
108+
/**
109+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and
110+
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or
111+
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing
112+
* ResourceSets that represent available resources.
113+
*/
114+
public static ResourceSet create(double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount) {
115+
return createWithWorkerKey(memoryMb, cpuUsage, extraResourceUseage, localTestCount, /* workerKey= */ null);
93116
}
94117

95118
public static ResourceSet createWithWorkerKey(
96-
double memoryMb, double cpuUsage, int localTestCount, WorkerKey workerKey) {
97-
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0 && workerKey == null) {
119+
double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount, WorkerKey workerKey) {
120+
if (memoryMb == 0 && cpuUsage == 0 && extraResourceUsage.size() == 0 && localTestCount == 0 && workerKey == null) {
98121
return ZERO;
99122
}
100-
return new ResourceSet(memoryMb, cpuUsage, localTestCount, workerKey);
123+
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount, workerKey);
101124
}
102125

103126
/** Returns the amount of real memory (resident set size) used in MB. */
@@ -124,6 +147,10 @@ public double getCpuUsage() {
124147
return cpuUsage;
125148
}
126149

150+
public ImmutableMap<String, Float> getExtraResourceUsage() {
151+
return extraResourceUsage;
152+
}
153+
127154
/** Returns the local test count used. */
128155
public int getLocalTestCount() {
129156
return localTestCount;
@@ -138,6 +165,7 @@ public String toString() {
138165
+ "CPU: "
139166
+ cpuUsage
140167
+ "\n"
168+
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet())
141169
+ "Local tests: "
142170
+ localTestCount
143171
+ "\n";

src/main/java/com/google/devtools/build/lib/analysis/test/TestTargetProperties.java

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
3232
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
3333
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code;
34+
import java.util.HashMap;
3435
import java.util.List;
3536
import java.util.Map;
3637

@@ -160,25 +161,29 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
160161
}
161162

162163
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
164+
return ResourceSet.create(
165+
testResourcesFromSize.getMemoryMb(),
166+
getLocalCpuResourceUsage(label),
167+
getLocalExtraResourceUsage(label),
168+
testResourcesFromSize.getLocalTestCount());
169+
}
163170

171+
private double getLocalCpuResourceUsage(Label label) throws UserExecException {
172+
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
164173
// Tests can override their CPU reservation with a "cpu:<n>" tag.
165-
ResourceSet testResourcesFromTag = null;
174+
double cpuCount = -1.0;
166175
for (String tag : executionInfo.keySet()) {
167176
try {
168177
String cpus = ExecutionRequirements.CPU.parseIfMatches(tag);
169178
if (cpus != null) {
170-
if (testResourcesFromTag != null) {
179+
if (cpuCount != -1.0) {
171180
String message =
172181
String.format(
173182
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
174183
label, ExecutionRequirements.CPU.userFriendlyName());
175184
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
176185
}
177-
testResourcesFromTag =
178-
ResourceSet.create(
179-
testResourcesFromSize.getMemoryMb(),
180-
Float.parseFloat(cpus),
181-
testResourcesFromSize.getLocalTestCount());
186+
cpuCount = Float.parseFloat(cpus);
182187
}
183188
} catch (ValidationException e) {
184189
String message =
@@ -191,10 +196,44 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
191196
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
192197
}
193198
}
199+
return cpuCount != -1.0 ? cpuCount : testResourcesFromSize.getCpuUsage();
200+
}
194201

195-
return testResourcesFromTag != null ? testResourcesFromTag : testResourcesFromSize;
202+
private ImmutableMap<String, Float> getLocalExtraResourceUsage(Label label) throws UserExecException {
203+
// Tests can specify requirements for extra resources using "resources:<resourcename>:<amount>" tag.
204+
Map<String, Float> extraResources = new HashMap<>();
205+
for (String tag : executionInfo.keySet()) {
206+
try {
207+
String extras = ExecutionRequirements.RESOURCES.parseIfMatches(tag);
208+
if (extras != null) {
209+
int splitIndex = extras.indexOf(":");
210+
String resourceName = extras.substring(0, splitIndex);
211+
String resourceCount = extras.substring(splitIndex+1);
212+
if (extraResources.get(resourceName) != null) {
213+
String message =
214+
String.format(
215+
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
216+
label, ExecutionRequirements.RESOURCES.userFriendlyName());
217+
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
218+
}
219+
extraResources.put(resourceName, Float.parseFloat(resourceCount));
220+
}
221+
} catch (ValidationException e) {
222+
String message =
223+
String.format(
224+
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s",
225+
label,
226+
ExecutionRequirements.RESOURCES.userFriendlyName(),
227+
e.getTagValue(),
228+
e.getMessage());
229+
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
230+
}
231+
}
232+
return ImmutableMap.copyOf(extraResources);
196233
}
197234

235+
236+
198237
private static FailureDetail createFailureDetail(String message, Code detailedCode) {
199238
return FailureDetail.newBuilder()
200239
.setMessage(message)

0 commit comments

Comments
 (0)