Skip to content

Commit b1476ba

Browse files
authored
Merge pull request #21731 from heejaechang/lowPriorityTimeout
Added retry on OOP start up
2 parents b735460 + 3009caa commit b1476ba

File tree

4 files changed

+104
-59
lines changed

4 files changed

+104
-59
lines changed

src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs

+88-51
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,14 @@ public static async Task<RemoteHostClient> CreateAsync(
3636
{
3737
using (Logger.LogBlock(FunctionId.ServiceHubRemoteHostClient_CreateAsync, cancellationToken))
3838
{
39-
// let each client to have unique id so that we can distinguish different clients when service is restarted
40-
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);
41-
4239
var primary = new HubClient("ManagedLanguage.IDE.RemoteHostClient");
43-
var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";
44-
45-
var hostGroup = new HostGroup(current);
4640
var timeout = TimeSpan.FromMilliseconds(workspace.Options.GetOption(RemoteHostOptions.RequestServiceTimeoutInMS));
47-
var remoteHostStream = await RequestServiceAsync(primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);
48-
49-
var instance = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);
50-
51-
// make sure connection is done right
52-
var host = await instance._rpc.InvokeAsync<string>(nameof(IRemoteHostService.Connect), current, TelemetryService.DefaultSession.SerializeSettings()).ConfigureAwait(false);
5341

54-
// TODO: change this to non fatal watson and make VS to use inproc implementation
55-
Contract.ThrowIfFalse(host == current.ToString());
42+
// Retry (with timeout) until we can connect to RemoteHost (service hub process).
43+
// we are seeing cases where we failed to connect to service hub process when a machine is under heavy load.
44+
// (see https://devdiv.visualstudio.com/DevDiv/_workitems/edit/481103 as one of example)
45+
var instance = await RetryRemoteCallAsync<IOException, ServiceHubRemoteHostClient>(
46+
() => CreateWorkerAsync(workspace, primary, timeout, cancellationToken), timeout, cancellationToken).ConfigureAwait(false);
5647

5748
instance.Connected();
5849

@@ -65,6 +56,43 @@ public static async Task<RemoteHostClient> CreateAsync(
6556
}
6657
}
6758

59+
public static async Task<ServiceHubRemoteHostClient> CreateWorkerAsync(Workspace workspace, HubClient primary, TimeSpan timeout, CancellationToken cancellationToken)
60+
{
61+
ServiceHubRemoteHostClient client = null;
62+
try
63+
{
64+
// let each client to have unique id so that we can distinguish different clients when service is restarted
65+
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);
66+
67+
var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";
68+
69+
var hostGroup = new HostGroup(current);
70+
var remoteHostStream = await RequestServiceAsync(
71+
primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);
72+
73+
client = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);
74+
75+
await client._rpc.InvokeWithCancellationAsync<string>(
76+
nameof(IRemoteHostService.Connect),
77+
new object[] { current, TelemetryService.DefaultSession.SerializeSettings() },
78+
cancellationToken).ConfigureAwait(false);
79+
80+
return client;
81+
}
82+
catch (Exception ex)
83+
{
84+
// make sure we shutdown client if initializing client has failed.
85+
client?.Shutdown();
86+
87+
// translate to our own cancellation if it is raised.
88+
cancellationToken.ThrowIfCancellationRequested();
89+
90+
// otherwise, report watson and throw original exception
91+
WatsonReporter.Report("ServiceHub creation failed", ex, ReportDetailInfo);
92+
throw;
93+
}
94+
}
95+
6896
private static async Task RegisterWorkspaceHostAsync(Workspace workspace, RemoteHostClient client)
6997
{
7098
var vsWorkspace = workspace as VisualStudioWorkspaceImpl;
@@ -88,7 +116,7 @@ await Task.Factory.SafeStartNew(() =>
88116

89117
private ServiceHubRemoteHostClient(
90118
Workspace workspace, HubClient hubClient, HostGroup hostGroup, Stream stream) :
91-
base(workspace)
119+
base(workspace)
92120
{
93121
_hubClient = hubClient;
94122
_hostGroup = hostGroup;
@@ -136,6 +164,40 @@ private void OnRpcDisconnected(object sender, JsonRpcDisconnectedEventArgs e)
136164
Disconnected();
137165
}
138166

167+
/// <summary>
168+
/// call <paramref name="funcAsync"/> and retry up to <paramref name="timeout"/> if the call throws
169+
/// <typeparamref name="TException"/>. any other exception from the call won't be handled here.
170+
/// </summary>
171+
private static async Task<TResult> RetryRemoteCallAsync<TException, TResult>(
172+
Func<Task<TResult>> funcAsync,
173+
TimeSpan timeout,
174+
CancellationToken cancellationToken) where TException : Exception
175+
{
176+
const int retry_delayInMS = 50;
177+
178+
var start = DateTime.UtcNow;
179+
while (DateTime.UtcNow - start < timeout)
180+
{
181+
cancellationToken.ThrowIfCancellationRequested();
182+
183+
try
184+
{
185+
return await funcAsync().ConfigureAwait(false);
186+
}
187+
catch (TException)
188+
{
189+
// throw cancellation token if operation is cancelled
190+
cancellationToken.ThrowIfCancellationRequested();
191+
}
192+
193+
// wait for retry_delayInMS before next try
194+
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
195+
}
196+
197+
// operation timed out, more than we are willing to wait
198+
throw new TimeoutException("RequestServiceAsync timed out");
199+
}
200+
139201
private static async Task<Stream> RequestServiceAsync(
140202
HubClient client,
141203
string serviceName,
@@ -156,7 +218,17 @@ private static async Task<Stream> RequestServiceAsync(
156218
{
157219
try
158220
{
159-
return await RequestServiceAsync(client, descriptor, timeout, cancellationToken).ConfigureAwait(false);
221+
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
222+
// we have bug opened to track the issue.
223+
// https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit
224+
225+
// retry on cancellation token since HubClient will throw its own cancellation token
226+
// when it couldn't connect to service hub service for some reasons
227+
// (ex, OOP process GC blocked and not responding to request)
228+
return await RetryRemoteCallAsync<OperationCanceledException, Stream>(
229+
() => client.RequestServiceAsync(descriptor, cancellationToken),
230+
timeout,
231+
cancellationToken).ConfigureAwait(false);
160232
}
161233
catch (RemoteInvocationException ex)
162234
{
@@ -184,41 +256,6 @@ private static async Task<Stream> RequestServiceAsync(
184256
throw ExceptionUtilities.Unreachable;
185257
}
186258

187-
private static async Task<Stream> RequestServiceAsync(HubClient client, ServiceDescriptor descriptor, TimeSpan timeout, CancellationToken cancellationToken = default(CancellationToken))
188-
{
189-
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
190-
// we have bug opened to track the issue.
191-
// https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit
192-
const int retry_delayInMS = 50;
193-
194-
var start = DateTime.UtcNow;
195-
while (start - DateTime.UtcNow < timeout)
196-
{
197-
cancellationToken.ThrowIfCancellationRequested();
198-
199-
try
200-
{
201-
return await client.RequestServiceAsync(descriptor, cancellationToken).ConfigureAwait(false);
202-
}
203-
catch (OperationCanceledException)
204-
{
205-
// if it is our own cancellation token, then rethrow
206-
// otherwise, let us retry.
207-
//
208-
// we do this since HubClient itself can throw its own cancellation token
209-
// when it couldn't connect to service hub service for some reasons
210-
// (ex, OOP process GC blocked and not responding to request)
211-
cancellationToken.ThrowIfCancellationRequested();
212-
}
213-
214-
// wait for retry_delayInMS before next try
215-
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
216-
}
217-
218-
// request service to HubClient timed out, more than we are willing to wait
219-
throw new TimeoutException("RequestServiceAsync timed out");
220-
}
221-
222259
private static int ReportDetailInfo(IFaultUtility faultUtility)
223260
{
224261
// 0 means send watson, otherwise, cancel watson

src/VisualStudio/Core/Test.Next/Services/ServiceHubServicesTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public void TestRemoteHostConnect()
3737
var remoteHostService = CreateService();
3838

3939
var input = "Test";
40-
var output = remoteHostService.Connect(input, serializedSession: null);
40+
var output = remoteHostService.Connect(input, serializedSession: null, cancellationToken: CancellationToken.None);
4141

4242
Assert.Equal(input, output);
4343
}

src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
22

3+
using System.Threading;
34
using System.Threading.Tasks;
45

56
namespace Microsoft.CodeAnalysis.Remote
67
{
78
internal interface IRemoteHostService
89
{
9-
string Connect(string host, string serializedSession);
10+
string Connect(string host, string serializedSession, CancellationToken cancellationToken);
1011
Task SynchronizePrimaryWorkspaceAsync(Checksum checksum);
1112
Task SynchronizeGlobalAssetsAsync(Checksum[] checksums);
1213

src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs

+13-6
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ static RemoteHostService()
4141
// we set up logger here
4242
RoslynLogger.SetLogger(new EtwLogger(GetLoggingChecker()));
4343

44-
// Set this process's priority BelowNormal.
45-
// this should let us to freely try to use all resources possible without worrying about affecting
46-
// host's work such as responsiveness or build.
47-
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;
48-
4944
SetNativeDllSearchDirectories();
5045
}
5146

@@ -56,8 +51,11 @@ public RemoteHostService(Stream stream, IServiceProvider serviceProvider) :
5651
Rpc.StartListening();
5752
}
5853

59-
public string Connect(string host, string serializedSession)
54+
public string Connect(string host, string serializedSession, CancellationToken cancellationToken)
6055
{
56+
cancellationToken.ThrowIfCancellationRequested();
57+
58+
// this is called only once when Host (VS) started RemoteHost (OOP)
6159
_primaryInstance = InstanceId;
6260

6361
var existing = Interlocked.CompareExchange(ref _host, host, null);
@@ -72,6 +70,15 @@ public string Connect(string host, string serializedSession)
7270
// log telemetry that service hub started
7371
RoslynLogger.Log(FunctionId.RemoteHost_Connect, KeyValueLogMessage.Create(SetSessionInfo));
7472

73+
// serializedSession will be null for testing
74+
if (serializedSession != null)
75+
{
76+
// Set this process's priority BelowNormal.
77+
// this should let us to freely try to use all resources possible without worrying about affecting
78+
// host's work such as responsiveness or build.
79+
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;
80+
}
81+
7582
return _host;
7683
}
7784

0 commit comments

Comments
 (0)