File tree Expand file tree Collapse file tree 3 files changed +23
-0
lines changed
tensorflow/core/distributed_runtime/rpc Expand file tree Collapse file tree 3 files changed +23
-0
lines changed Original file line number Diff line number Diff line change @@ -32,6 +32,13 @@ GrpcEagerServiceImpl::GrpcEagerServiceImpl(
32
32
local_impl_ (env),
33
33
enqueue_streaming_thread_(env_->env, " enqueue_streaming_thread" , 1 ) {
34
34
server_builder->RegisterService (&service_);
35
+ // gRPC by default will cancel requests that sit in a completion queue for
36
+ // more than 30s. See
37
+ // https://github.com/grpc/grpc/blob/e52e48b7ef83feeff56ed0894ce39841ea8bd483/include/grpc/impl/channel_arg_names.h#L106-L111
38
+ // Extending this to 1 hour for Tensorflow since some graphs may have periods
39
+ // of heavy load which may cause the server to run into these cancellations.
40
+ server_builder->AddChannelArgument (
41
+ " grpc.server_max_unrequested_time_in_server" , 3600 );
35
42
cq_ = server_builder->AddCompletionQueue ();
36
43
}
37
44
Original file line number Diff line number Diff line change @@ -54,6 +54,14 @@ class GrpcMasterService : public tsl::AsyncServiceInterface {
54
54
is_shutdown_ (false ),
55
55
default_session_config_(default_session_config) {
56
56
builder->RegisterService (&master_service_);
57
+ // gRPC by default will cancel requests that sit in a completion queue for
58
+ // more than 30s. See
59
+ // https://github.com/grpc/grpc/blob/e52e48b7ef83feeff56ed0894ce39841ea8bd483/include/grpc/impl/channel_arg_names.h#L106-L111
60
+ // Extending this to 1 hour for Tensorflow since some graphs may have
61
+ // periods of heavy load which may cause the server to run into these
62
+ // cancellations.
63
+ builder->AddChannelArgument (" grpc.server_max_unrequested_time_in_server" ,
64
+ 3600 );
57
65
cq_ = builder->AddCompletionQueue ();
58
66
}
59
67
Original file line number Diff line number Diff line change @@ -371,6 +371,14 @@ class GrpcWorkerService : public tsl::AsyncServiceInterface {
371
371
GrpcWorkerServiceOptions options)
372
372
: is_shutdown_(false ) {
373
373
builder->RegisterService (&worker_service_);
374
+ // gRPC by default will cancel requests that sit in a completion queue for
375
+ // more than 30s. See
376
+ // https://github.com/grpc/grpc/blob/e52e48b7ef83feeff56ed0894ce39841ea8bd483/include/grpc/impl/channel_arg_names.h#L106-L111
377
+ // Extending this to 1 hour for Tensorflow since some graphs may have
378
+ // periods of heavy load which may cause the server to run into these
379
+ // cancellations.
380
+ builder->AddChannelArgument (" grpc.server_max_unrequested_time_in_server" ,
381
+ 3600 );
374
382
375
383
for (int i = 0 ; i < options.num_serving_threads ; i++) {
376
384
threads_.emplace_back (new GrpcWorkerServiceThread (
You can’t perform that action at this time.
0 commit comments