Skip to content

Commit bcfc7f7

Browse files
Drew MacraeShreeM01
Drew Macrae
andauthored
Extra resources (#17229)
This recreates a [closed PR](#13996) to implement extra resources which we're hoping to use in lowRISC/opentitan#16436 Fixes:#16817 Closes #16785. PiperOrigin-RevId: 498557024 Change-Id: I60d8f8f4a4a02748147cabb4cd60a2a9b95a2c68 Co-authored-by: kshyanashree <[email protected]>
1 parent 0849b3b commit bcfc7f7

File tree

10 files changed

+338
-24
lines changed

10 files changed

+338
-24
lines changed

src/main/java/com/google/devtools/build/lib/actions/ExecutionRequirements.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,30 @@ public String parseIfMatches(String tag) throws ValidationException {
154154
return null;
155155
});
156156

157+
/** How many extra resources an action requires for execution. */
158+
public static final ParseableRequirement RESOURCES =
159+
ParseableRequirement.create(
160+
"resources:<str>:<float>",
161+
Pattern.compile("resources:(.+:.+)"),
162+
s -> {
163+
Preconditions.checkNotNull(s);
164+
165+
int splitIndex = s.indexOf(":");
166+
String resourceCount = s.substring(splitIndex + 1);
167+
float value;
168+
try {
169+
value = Float.parseFloat(resourceCount);
170+
} catch (NumberFormatException e) {
171+
return "can't be parsed as a float";
172+
}
173+
174+
if (value < 0) {
175+
return "can't be negative";
176+
}
177+
178+
return null;
179+
});
180+
157181
/** If an action supports running in persistent worker mode. */
158182
public static final String SUPPORTS_WORKERS = "supports-workers";
159183

src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java

Lines changed: 82 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,13 @@
2727
import com.google.devtools.build.lib.worker.WorkerPool;
2828
import java.io.IOException;
2929
import java.util.Deque;
30+
import java.util.HashMap;
31+
import java.util.HashSet;
3032
import java.util.Iterator;
3133
import java.util.LinkedList;
34+
import java.util.Map;
35+
import java.util.NoSuchElementException;
36+
import java.util.Set;
3237
import java.util.concurrent.CountDownLatch;
3338
import javax.annotation.Nullable;
3439

@@ -171,14 +176,16 @@ public static ResourceManager instance() {
171176
// definition in the ResourceSet class.
172177
private double usedRam;
173178

179+
// Used amount of extra resources. Corresponds to the extra resource
180+
// definition in the ResourceSet class.
181+
private Map<String, Float> usedExtraResources;
182+
174183
// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
175184
private int usedLocalTestCount;
176185

177186
/** If set, local-only actions are given priority over dynamically run actions. */
178187
private boolean prioritizeLocalActions;
179188

180-
private ResourceManager() {}
181-
182189
@VisibleForTesting
183190
public static ResourceManager instanceForTestingOnly() {
184191
return new ResourceManager();
@@ -192,6 +199,7 @@ public static ResourceManager instanceForTestingOnly() {
192199
public synchronized void resetResourceUsage() {
193200
usedCpu = 0;
194201
usedRam = 0;
202+
usedExtraResources = new HashMap<>();
195203
usedLocalTestCount = 0;
196204
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
197205
request.second.latch.countDown();
@@ -286,6 +294,20 @@ private Worker incrementResources(ResourceSet resources)
286294
throws IOException, InterruptedException {
287295
usedCpu += resources.getCpuUsage();
288296
usedRam += resources.getMemoryMb();
297+
298+
resources
299+
.getExtraResourceUsage()
300+
.entrySet()
301+
.forEach(
302+
resource -> {
303+
String key = (String) resource.getKey();
304+
float value = resource.getValue();
305+
if (usedExtraResources.containsKey(key)) {
306+
value += (float) usedExtraResources.get(key);
307+
}
308+
usedExtraResources.put(key, value);
309+
});
310+
289311
usedLocalTestCount += resources.getLocalTestCount();
290312

291313
if (resources.getWorkerKey() != null) {
@@ -298,6 +320,7 @@ private Worker incrementResources(ResourceSet resources)
298320
public synchronized boolean inUse() {
299321
return usedCpu != 0.0
300322
|| usedRam != 0.0
323+
|| !usedExtraResources.isEmpty()
301324
|| usedLocalTestCount != 0
302325
|| !localRequests.isEmpty()
303326
|| !dynamicWorkerRequests.isEmpty()
@@ -357,7 +380,7 @@ public void acquireResourceOwnership() {
357380
* wait.
358381
*/
359382
private synchronized LatchWithWorker acquire(ResourceSet resources, ResourcePriority priority)
360-
throws IOException, InterruptedException {
383+
throws IOException, InterruptedException, NoSuchElementException {
361384
if (areResourcesAvailable(resources)) {
362385
Worker worker = incrementResources(resources);
363386
return new LatchWithWorker(/* latch= */ null, worker);
@@ -405,6 +428,7 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
405428
private synchronized void releaseResourcesOnly(ResourceSet resources) {
406429
usedCpu -= resources.getCpuUsage();
407430
usedRam -= resources.getMemoryMb();
431+
408432
usedLocalTestCount -= resources.getLocalTestCount();
409433

410434
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
@@ -416,6 +440,19 @@ private synchronized void releaseResourcesOnly(ResourceSet resources) {
416440
if (usedRam < epsilon) {
417441
usedRam = 0;
418442
}
443+
444+
Set<String> toRemove = new HashSet<>();
445+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
446+
String key = (String) resource.getKey();
447+
float value = (float) usedExtraResources.get(key) - resource.getValue();
448+
usedExtraResources.put(key, value);
449+
if (value < epsilon) {
450+
toRemove.add(key);
451+
}
452+
}
453+
for (String key : toRemove) {
454+
usedExtraResources.remove(key);
455+
}
419456
}
420457

421458
private synchronized boolean processAllWaitingThreads() throws IOException, InterruptedException {
@@ -454,9 +491,35 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
454491
}
455492
}
456493

494+
/** Throws an exception if requested extra resource isn't being tracked */
495+
private void assertExtraResourcesTracked(ResourceSet resources) throws NoSuchElementException {
496+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
497+
String key = (String) resource.getKey();
498+
if (!availableResources.getExtraResourceUsage().containsKey(key)) {
499+
throw new NoSuchElementException(
500+
"Resource " + key + " is not tracked in this resource set.");
501+
}
502+
}
503+
}
504+
505+
/** Return true iff all requested extra resources are considered to be available. */
506+
private boolean areExtraResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
507+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
508+
String key = (String) resource.getKey();
509+
float used = (float) usedExtraResources.getOrDefault(key, 0f);
510+
float requested = resource.getValue();
511+
float available = availableResources.getExtraResourceUsage().get(key);
512+
float epsilon = 0.0001f; // Account for possible rounding errors.
513+
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
514+
return false;
515+
}
516+
}
517+
return true;
518+
}
519+
457520
// Method will return true if all requested resources are considered to be available.
458521
@VisibleForTesting
459-
boolean areResourcesAvailable(ResourceSet resources) {
522+
boolean areResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
460523
Preconditions.checkNotNull(availableResources);
461524
// Comparison below is robust, since any calculation errors will be fixed
462525
// by the release() method.
@@ -472,7 +535,15 @@ boolean areResourcesAvailable(ResourceSet resources) {
472535
workerKey == null
473536
|| (activeWorkers < availableWorkers && workerPool.couldBeBorrowed(workerKey));
474537

475-
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0 && workerIsAvailable) {
538+
// We test for tracking of extra resources whenever acquired and throw an
539+
// exception before acquiring any untracked resource.
540+
assertExtraResourcesTracked(resources);
541+
542+
if (usedCpu == 0.0
543+
&& usedRam == 0.0
544+
&& usedExtraResources.isEmpty()
545+
&& usedLocalTestCount == 0
546+
&& workerIsAvailable) {
476547
return true;
477548
}
478549
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
@@ -503,7 +574,12 @@ boolean areResourcesAvailable(ResourceSet resources) {
503574
localTestCount == 0
504575
|| usedLocalTestCount == 0
505576
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
506-
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable && workerIsAvailable;
577+
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
578+
return cpuIsAvailable
579+
&& ramIsAvailable
580+
&& extraResourcesIsAvailable
581+
&& localTestCountIsAvailable
582+
&& workerIsAvailable;
507583
}
508584

509585
@VisibleForTesting

src/main/java/com/google/devtools/build/lib/actions/ResourceSet.java

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
package com.google.devtools.build.lib.actions;
1616

17+
import com.google.common.base.Joiner;
1718
import com.google.common.base.Splitter;
19+
import com.google.common.collect.ImmutableMap;
1820
import com.google.common.primitives.Doubles;
1921
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
2022
import com.google.devtools.build.lib.util.OS;
@@ -43,6 +45,12 @@ public class ResourceSet implements ResourceSetOrBuilder {
4345
/** The number of CPUs, or fractions thereof. */
4446
private final double cpuUsage;
4547

48+
/**
49+
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping name of the resource
50+
* to a value.
51+
*/
52+
private final ImmutableMap<String, Float> extraResourceUsage;
53+
4654
/** The number of local tests. */
4755
private final int localTestCount;
4856

@@ -51,8 +59,18 @@ public class ResourceSet implements ResourceSetOrBuilder {
5159

5260
private ResourceSet(
5361
double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
62+
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, workerKey);
63+
}
64+
65+
private ResourceSet(
66+
double memoryMb,
67+
double cpuUsage,
68+
@Nullable ImmutableMap<String, Float> extraResourceUsage,
69+
int localTestCount,
70+
@Nullable WorkerKey workerKey) {
5471
this.memoryMb = memoryMb;
5572
this.cpuUsage = cpuUsage;
73+
this.extraResourceUsage = extraResourceUsage;
5674
this.localTestCount = localTestCount;
5775
this.workerKey = workerKey;
5876
}
@@ -83,21 +101,51 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) {
83101
}
84102

85103
/**
86-
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and
87-
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or {@link
104+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and localTestCount.
105+
* Most action resource definitions should use {@link #createWithRamCpu} or {@link
88106
* #createWithLocalTestCount(int)}. Use this method primarily when constructing ResourceSets that
89107
* represent available resources.
90108
*/
91109
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) {
92-
return createWithWorkerKey(memoryMb, cpuUsage, localTestCount, /* workerKey= */ null);
110+
return ResourceSet.createWithWorkerKey(
111+
memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, /* workerKey= */ null);
112+
}
113+
114+
/**
115+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and
116+
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or {@link
117+
* #createWithLocalTestCount(int)}. Use this method primarily when constructing ResourceSets that
118+
* represent available resources.
119+
*/
120+
public static ResourceSet create(
121+
double memoryMb,
122+
double cpuUsage,
123+
ImmutableMap<String, Float> extraResourceUsage,
124+
int localTestCount) {
125+
return createWithWorkerKey(
126+
memoryMb, cpuUsage, extraResourceUsage, localTestCount, /* workerKey= */ null);
93127
}
94128

95129
public static ResourceSet createWithWorkerKey(
96130
double memoryMb, double cpuUsage, int localTestCount, WorkerKey workerKey) {
97-
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0 && workerKey == null) {
131+
return ResourceSet.createWithWorkerKey(
132+
memoryMb, cpuUsage, /* extraResourceUsage= */ ImmutableMap.of(), localTestCount, workerKey);
133+
}
134+
135+
public static ResourceSet createWithWorkerKey(
136+
double memoryMb,
137+
double cpuUsage,
138+
ImmutableMap<String, Float> extraResourceUsage,
139+
int localTestCount,
140+
WorkerKey workerKey) {
141+
if (memoryMb == 0
142+
&& cpuUsage == 0
143+
&& extraResourceUsage.size() == 0
144+
&& localTestCount == 0
145+
&& workerKey == null) {
98146
return ZERO;
99147
}
100-
return new ResourceSet(memoryMb, cpuUsage, localTestCount, workerKey);
148+
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount, workerKey);
101149
}
102150

103151
/** Returns the amount of real memory (resident set size) used in MB. */
@@ -124,6 +172,10 @@ public double getCpuUsage() {
124172
return cpuUsage;
125173
}
126174

175+
public ImmutableMap<String, Float> getExtraResourceUsage() {
176+
return extraResourceUsage;
177+
}
178+
127179
/** Returns the local test count used. */
128180
public int getLocalTestCount() {
129181
return localTestCount;
@@ -138,6 +190,7 @@ public String toString() {
138190
+ "CPU: "
139191
+ cpuUsage
140192
+ "\n"
193+
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet())
141194
+ "Local tests: "
142195
+ localTestCount
143196
+ "\n";

0 commit comments

Comments
 (0)