Skip to content

Commit bc54c64

Browse files
Googlercopybara-github
Googler
authored andcommitted
Remote: Use parameters instead of thread-local storage to provide tracing metadata.
Remote Execution API defines RequestMetadata which is an optional message attached to any RPC request to tell the server about an external context of the request. Existing code uses Context object from gRPC which is essentially a thread-local storage to provide metadata: 1. We always attach the Context even the underlying implementation isn't based on gRPC. 2. It is error prone in a multi-threaded context. This is the first step of a series changes where we introduce RemoteActionExecutionContext and update RemoteCacheClient#downloadActionResult to use it. These is a regression that networkTime of SpawnMetrics will not be correct since the NetworkTimeInterceptor is no longer installed at RemoteModule and other methods haven't get updated to use the RemoteActionExecutionContext. The networkTime will back to normal once we finish the cleanup. PiperOrigin-RevId: 353819916
1 parent dafcebf commit bc54c64

23 files changed

+408
-139
lines changed

src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.google.devtools.build.lib.remote.RemoteRetrier.ProgressiveBackoff;
4848
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
4949
import com.google.devtools.build.lib.remote.common.MissingDigestsFinder;
50+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext;
5051
import com.google.devtools.build.lib.remote.common.RemoteCacheClient;
5152
import com.google.devtools.build.lib.remote.options.RemoteOptions;
5253
import com.google.devtools.build.lib.remote.util.DigestOutputStream;
@@ -142,9 +143,11 @@ private ActionCacheBlockingStub acBlockingStub() {
142143
.withDeadlineAfter(options.remoteTimeout.getSeconds(), TimeUnit.SECONDS);
143144
}
144145

145-
private ActionCacheFutureStub acFutureStub() {
146+
private ActionCacheFutureStub acFutureStub(RemoteActionExecutionContext context) {
146147
return ActionCacheGrpc.newFutureStub(channel)
147-
.withInterceptors(TracingMetadataUtils.attachMetadataFromContextInterceptor())
148+
.withInterceptors(
149+
TracingMetadataUtils.attachMetadataInterceptor(context.getRequestMetadata()),
150+
new NetworkTimeInterceptor(context::getNetworkTime))
148151
.withCallCredentials(callCredentialsProvider.getCallCredentials())
149152
.withDeadlineAfter(options.remoteTimeout.getSeconds(), TimeUnit.SECONDS);
150153
}
@@ -240,19 +243,18 @@ private ListenableFuture<ActionResult> handleStatus(ListenableFuture<ActionResul
240243

241244
@Override
242245
public ListenableFuture<ActionResult> downloadActionResult(
243-
ActionKey actionKey, boolean inlineOutErr) {
246+
RemoteActionExecutionContext context, ActionKey actionKey, boolean inlineOutErr) {
244247
GetActionResultRequest request =
245248
GetActionResultRequest.newBuilder()
246249
.setInstanceName(options.remoteInstanceName)
247250
.setActionDigest(actionKey.getDigest())
248251
.setInlineStderr(inlineOutErr)
249252
.setInlineStdout(inlineOutErr)
250253
.build();
251-
Context ctx = Context.current();
252254
return Utils.refreshIfUnauthenticatedAsync(
253255
() ->
254256
retrier.executeAsync(
255-
() -> ctx.call(() -> handleStatus(acFutureStub().getActionResult(request)))),
257+
() -> handleStatus(acFutureStub(context).getActionResult(request))),
256258
callCredentialsProvider);
257259
}
258260

src/main/java/com/google/devtools/build/lib/remote/util/NetworkTime.java renamed to src/main/java/com/google/devtools/build/lib/remote/NetworkTimeInterceptor.java

Lines changed: 19 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 The Bazel Authors. All rights reserved.
1+
// Copyright 2020 The Bazel Authors. All rights reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,12 +11,10 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
package com.google.devtools.build.lib.remote.util;
14+
package com.google.devtools.build.lib.remote;
1515

1616
import build.bazel.remote.execution.v2.ExecutionGrpc;
17-
import com.google.common.base.MoreObjects;
18-
import com.google.common.base.Stopwatch;
19-
import com.google.devtools.build.lib.concurrent.ThreadSafety;
17+
import com.google.devtools.build.lib.remote.common.NetworkTime;
2018
import io.grpc.CallOptions;
2119
import io.grpc.Channel;
2220
import io.grpc.ClientCall;
@@ -27,59 +25,31 @@
2725
import io.grpc.Metadata;
2826
import io.grpc.MethodDescriptor;
2927
import io.grpc.Status;
30-
import java.time.Duration;
28+
import java.util.function.Supplier;
3129

32-
/** Reentrant wall clock stopwatch and grpc interceptor for network waits. */
33-
@ThreadSafety.ThreadSafe
34-
public class NetworkTime {
30+
/** The ClientInterceptor used to track network time. */
31+
public class NetworkTimeInterceptor implements ClientInterceptor {
3532

3633
public static final Context.Key<NetworkTime> CONTEXT_KEY = Context.key("remote-network-time");
34+
private final Supplier<NetworkTime> networkTimeSupplier;
3735

38-
private final Stopwatch wallTime = Stopwatch.createUnstarted();
39-
private int outstanding = 0;
40-
41-
private synchronized void start() {
42-
if (!wallTime.isRunning()) {
43-
wallTime.start();
44-
}
45-
outstanding++;
46-
}
47-
48-
private synchronized void stop() {
49-
if (--outstanding == 0) {
50-
wallTime.stop();
51-
}
52-
}
53-
54-
public Duration getDuration() {
55-
return wallTime.elapsed();
36+
public NetworkTimeInterceptor(Supplier<NetworkTime> networkTimeSupplier) {
37+
this.networkTimeSupplier = networkTimeSupplier;
5638
}
5739

5840
@Override
59-
public String toString() {
60-
return MoreObjects.toStringHelper(this)
61-
.add("outstanding", outstanding)
62-
.add("wallTime", wallTime)
63-
.add("wallTime.isRunning", wallTime.isRunning())
64-
.toString();
65-
}
66-
67-
/** The ClientInterceptor used to track network time. */
68-
public static class Interceptor implements ClientInterceptor {
69-
@Override
70-
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
71-
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
72-
ClientCall<ReqT, RespT> call = next.newCall(method, callOptions);
73-
// prevent accounting for execution wait time
74-
if (method != ExecutionGrpc.getExecuteMethod()
75-
&& method != ExecutionGrpc.getWaitExecutionMethod()) {
76-
NetworkTime networkTime = CONTEXT_KEY.get();
77-
if (networkTime != null) {
78-
call = new NetworkTimeCall<>(call, networkTime);
79-
}
41+
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
42+
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
43+
ClientCall<ReqT, RespT> call = next.newCall(method, callOptions);
44+
// prevent accounting for execution wait time
45+
if (method != ExecutionGrpc.getExecuteMethod()
46+
&& method != ExecutionGrpc.getWaitExecutionMethod()) {
47+
NetworkTime networkTime = networkTimeSupplier.get();
48+
if (networkTime != null) {
49+
call = new NetworkTimeCall<>(call, networkTime);
8050
}
81-
return call;
8251
}
52+
return call;
8353
}
8454

8555
private static class NetworkTimeCall<ReqT, RespT>

src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.DirectoryMetadata;
5656
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.FileMetadata;
5757
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.SymlinkMetadata;
58+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext;
5859
import com.google.devtools.build.lib.remote.common.RemoteActionFileArtifactValue;
5960
import com.google.devtools.build.lib.remote.common.RemoteCacheClient;
6061
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
@@ -115,9 +116,10 @@ public RemoteCache(
115116
this.digestUtil = digestUtil;
116117
}
117118

118-
public ActionResult downloadActionResult(ActionKey actionKey, boolean inlineOutErr)
119+
public ActionResult downloadActionResult(
120+
RemoteActionExecutionContext context, ActionKey actionKey, boolean inlineOutErr)
119121
throws IOException, InterruptedException {
120-
return getFromFuture(cacheProtocol.downloadActionResult(actionKey, inlineOutErr));
122+
return getFromFuture(cacheProtocol.downloadActionResult(context, actionKey, inlineOutErr));
121123
}
122124

123125
/**

src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
import com.google.devtools.build.lib.remote.options.RemoteOptions;
6969
import com.google.devtools.build.lib.remote.options.RemoteOutputsMode;
7070
import com.google.devtools.build.lib.remote.util.DigestUtil;
71-
import com.google.devtools.build.lib.remote.util.NetworkTime;
7271
import com.google.devtools.build.lib.remote.util.TracingMetadataUtils;
7372
import com.google.devtools.build.lib.remote.util.Utils;
7473
import com.google.devtools.build.lib.runtime.BlazeModule;
@@ -330,7 +329,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
330329
if (loggingInterceptor != null) {
331330
interceptors.add(loggingInterceptor);
332331
}
333-
interceptors.add(new NetworkTime.Interceptor());
334332
try {
335333
execChannel =
336334
RemoteCacheClientFactory.createGrpcChannelPool(
@@ -357,7 +355,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
357355
if (loggingInterceptor != null) {
358356
interceptors.add(loggingInterceptor);
359357
}
360-
interceptors.add(new NetworkTime.Interceptor());
361358
try {
362359
cacheChannel =
363360
RemoteCacheClientFactory.createGrpcChannelPool(
@@ -551,7 +548,9 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
551548
remoteCache,
552549
remoteExecutor,
553550
digestUtil,
554-
repoContext,
551+
buildRequestId,
552+
invocationId,
553+
"repository_rule",
555554
remoteOptions.remoteInstanceName,
556555
remoteOptions.remoteAcceptCached));
557556
} else {

src/main/java/com/google/devtools/build/lib/remote/RemoteRepositoryRemoteExecutor.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,15 @@
2828
import com.google.devtools.build.lib.profiler.Profiler;
2929
import com.google.devtools.build.lib.profiler.ProfilerTask;
3030
import com.google.devtools.build.lib.profiler.SilentCloseable;
31+
import com.google.devtools.build.lib.remote.common.NetworkTime;
3132
import com.google.devtools.build.lib.remote.common.OperationObserver;
33+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext;
34+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContextImpl;
3235
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
3336
import com.google.devtools.build.lib.remote.common.RemoteExecutionClient;
3437
import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
3538
import com.google.devtools.build.lib.remote.util.DigestUtil;
39+
import com.google.devtools.build.lib.remote.util.TracingMetadataUtils;
3640
import com.google.devtools.build.lib.remote.util.Utils;
3741
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor;
3842
import com.google.devtools.build.lib.vfs.Path;
@@ -49,7 +53,9 @@ public class RemoteRepositoryRemoteExecutor implements RepositoryRemoteExecutor
4953
private final RemoteExecutionCache remoteCache;
5054
private final RemoteExecutionClient remoteExecutor;
5155
private final DigestUtil digestUtil;
52-
private final Context requestCtx;
56+
private final String buildRequestId;
57+
private final String commandId;
58+
private final String actionId;
5359

5460
private final String remoteInstanceName;
5561
private final boolean acceptCached;
@@ -58,13 +64,17 @@ public RemoteRepositoryRemoteExecutor(
5864
RemoteExecutionCache remoteCache,
5965
RemoteExecutionClient remoteExecutor,
6066
DigestUtil digestUtil,
61-
Context requestCtx,
67+
String buildRequestId,
68+
String commandId,
69+
String actionId,
6270
String remoteInstanceName,
6371
boolean acceptCached) {
6472
this.remoteCache = remoteCache;
6573
this.remoteExecutor = remoteExecutor;
6674
this.digestUtil = digestUtil;
67-
this.requestCtx = requestCtx;
75+
this.buildRequestId = buildRequestId;
76+
this.commandId = commandId;
77+
this.actionId = actionId;
6878
this.remoteInstanceName = remoteInstanceName;
6979
this.acceptCached = acceptCached;
7080
}
@@ -100,6 +110,8 @@ public ExecutionResult execute(
100110
String workingDirectory,
101111
Duration timeout)
102112
throws IOException, InterruptedException {
113+
Context requestCtx =
114+
TracingMetadataUtils.contextWithMetadata(buildRequestId, commandId, actionId);
103115
Context prev = requestCtx.attach();
104116
try {
105117
Platform platform = PlatformUtils.buildPlatformProto(executionProperties);
@@ -117,10 +129,16 @@ public ExecutionResult execute(
117129
commandHash, merkleTree.getRootDigest(), timeout, acceptCached);
118130
Digest actionDigest = digestUtil.compute(action);
119131
ActionKey actionKey = new ActionKey(actionDigest);
132+
RemoteActionExecutionContext remoteActionExecutionContext =
133+
new RemoteActionExecutionContextImpl(
134+
TracingMetadataUtils.buildMetadata(buildRequestId, commandId, actionId),
135+
new NetworkTime());
120136
ActionResult actionResult;
121137
try (SilentCloseable c =
122138
Profiler.instance().profile(ProfilerTask.REMOTE_CACHE_CHECK, "check cache hit")) {
123-
actionResult = remoteCache.downloadActionResult(actionKey, /* inlineOutErr= */ true);
139+
actionResult =
140+
remoteCache.downloadActionResult(
141+
remoteActionExecutionContext, actionKey, /* inlineOutErr= */ true);
124142
}
125143
if (actionResult == null || actionResult.getExitCode() != 0) {
126144
try (SilentCloseable c =

src/main/java/com/google/devtools/build/lib/remote/RemoteRepositoryRemoteExecutorFactory.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,16 @@
1717
import com.google.devtools.build.lib.remote.util.DigestUtil;
1818
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor;
1919
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutorFactory;
20-
import io.grpc.Context;
2120

2221
/** Factory for {@link RemoteRepositoryRemoteExecutor}. */
2322
class RemoteRepositoryRemoteExecutorFactory implements RepositoryRemoteExecutorFactory {
2423

2524
private final RemoteExecutionCache remoteExecutionCache;
2625
private final RemoteExecutionClient remoteExecutor;
2726
private final DigestUtil digestUtil;
28-
private final Context requestCtx;
27+
private final String buildRequestId;
28+
private final String commandId;
29+
private final String actionId;
2930

3031
private final String remoteInstanceName;
3132
private final boolean acceptCached;
@@ -34,13 +35,17 @@ class RemoteRepositoryRemoteExecutorFactory implements RepositoryRemoteExecutorF
3435
RemoteExecutionCache remoteExecutionCache,
3536
RemoteExecutionClient remoteExecutor,
3637
DigestUtil digestUtil,
37-
Context requestCtx,
38+
String buildRequestId,
39+
String commandId,
40+
String actionId,
3841
String remoteInstanceName,
3942
boolean acceptCached) {
4043
this.remoteExecutionCache = remoteExecutionCache;
4144
this.remoteExecutor = remoteExecutor;
4245
this.digestUtil = digestUtil;
43-
this.requestCtx = requestCtx;
46+
this.buildRequestId = buildRequestId;
47+
this.commandId = commandId;
48+
this.actionId = actionId;
4449
this.remoteInstanceName = remoteInstanceName;
4550
this.acceptCached = acceptCached;
4651
}
@@ -51,7 +56,9 @@ public RepositoryRemoteExecutor create() {
5156
remoteExecutionCache,
5257
remoteExecutor,
5358
digestUtil,
54-
requestCtx,
59+
buildRequestId,
60+
commandId,
61+
actionId,
5562
remoteInstanceName,
5663
acceptCached);
5764
}

src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,14 @@
4848
import com.google.devtools.build.lib.profiler.ProfilerTask;
4949
import com.google.devtools.build.lib.profiler.SilentCloseable;
5050
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
51+
import com.google.devtools.build.lib.remote.common.NetworkTime;
52+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext;
53+
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContextImpl;
5154
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
5255
import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
5356
import com.google.devtools.build.lib.remote.options.RemoteOptions;
5457
import com.google.devtools.build.lib.remote.options.RemoteOutputsMode;
5558
import com.google.devtools.build.lib.remote.util.DigestUtil;
56-
import com.google.devtools.build.lib.remote.util.NetworkTime;
5759
import com.google.devtools.build.lib.remote.util.TracingMetadataUtils;
5860
import com.google.devtools.build.lib.remote.util.Utils;
5961
import com.google.devtools.build.lib.remote.util.Utils.InMemoryOutput;
@@ -151,9 +153,15 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
151153
digestUtil.compute(command), merkleTreeRoot, context.getTimeout(), true);
152154
// Look up action cache, and reuse the action output if it is found.
153155
ActionKey actionKey = digestUtil.computeActionKey(action);
156+
157+
RemoteActionExecutionContext remoteActionExecutionContext =
158+
new RemoteActionExecutionContextImpl(
159+
TracingMetadataUtils.buildMetadata(
160+
buildRequestId, commandId, actionKey.getDigest().getHash()),
161+
networkTime);
154162
Context withMetadata =
155163
TracingMetadataUtils.contextWithMetadata(buildRequestId, commandId, actionKey)
156-
.withValue(NetworkTime.CONTEXT_KEY, networkTime);
164+
.withValue(NetworkTimeInterceptor.CONTEXT_KEY, networkTime);
157165

158166
Profiler prof = Profiler.instance();
159167
if (options.remoteAcceptCached
@@ -165,7 +173,9 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
165173
try {
166174
ActionResult result;
167175
try (SilentCloseable c = prof.profile(ProfilerTask.REMOTE_CACHE_CHECK, "check cache hit")) {
168-
result = remoteCache.downloadActionResult(actionKey, /* inlineOutErr= */ false);
176+
result =
177+
remoteCache.downloadActionResult(
178+
remoteActionExecutionContext, actionKey, /* inlineOutErr= */ false);
169179
}
170180
// In case the remote cache returned a failed action (exit code != 0) we treat it as a
171181
// cache miss

0 commit comments

Comments
 (0)