@@ -36,23 +36,14 @@ public static async Task<RemoteHostClient> CreateAsync(
36
36
{
37
37
using ( Logger . LogBlock ( FunctionId . ServiceHubRemoteHostClient_CreateAsync , cancellationToken ) )
38
38
{
39
- // let each client to have unique id so that we can distinguish different clients when service is restarted
40
- var currentInstanceId = Interlocked . Add ( ref s_instanceId , 1 ) ;
41
-
42
39
var primary = new HubClient ( "ManagedLanguage.IDE.RemoteHostClient" ) ;
43
- var current = $ "VS ({ Process . GetCurrentProcess ( ) . Id } ) ({ currentInstanceId } )";
44
-
45
- var hostGroup = new HostGroup ( current ) ;
46
40
var timeout = TimeSpan . FromMilliseconds ( workspace . Options . GetOption ( RemoteHostOptions . RequestServiceTimeoutInMS ) ) ;
47
- var remoteHostStream = await RequestServiceAsync ( primary , WellKnownRemoteHostServices . RemoteHostService , hostGroup , timeout , cancellationToken ) . ConfigureAwait ( false ) ;
48
-
49
- var instance = new ServiceHubRemoteHostClient ( workspace , primary , hostGroup , remoteHostStream ) ;
50
-
51
- // make sure connection is done right
52
- var host = await instance . _rpc . InvokeAsync < string > ( nameof ( IRemoteHostService . Connect ) , current , TelemetryService . DefaultSession . SerializeSettings ( ) ) . ConfigureAwait ( false ) ;
53
41
54
- // TODO: change this to non fatal watson and make VS to use inproc implementation
55
- Contract . ThrowIfFalse ( host == current . ToString ( ) ) ;
42
+ // Retry (with timeout) until we can connect to RemoteHost (service hub process).
43
+ // we are seeing cases where we failed to connect to service hub process when a machine is under heavy load.
44
+ // (see https://devdiv.visualstudio.com/DevDiv/_workitems/edit/481103 as one of example)
45
+ var instance = await RetryRemoteCallAsync < IOException , ServiceHubRemoteHostClient > (
46
+ ( ) => CreateWorkerAsync ( workspace , primary , timeout , cancellationToken ) , timeout , cancellationToken ) . ConfigureAwait ( false ) ;
56
47
57
48
instance . Connected ( ) ;
58
49
@@ -65,6 +56,43 @@ public static async Task<RemoteHostClient> CreateAsync(
65
56
}
66
57
}
67
58
59
+ public static async Task < ServiceHubRemoteHostClient > CreateWorkerAsync ( Workspace workspace , HubClient primary , TimeSpan timeout , CancellationToken cancellationToken )
60
+ {
61
+ ServiceHubRemoteHostClient client = null ;
62
+ try
63
+ {
64
+ // let each client to have unique id so that we can distinguish different clients when service is restarted
65
+ var currentInstanceId = Interlocked . Add ( ref s_instanceId , 1 ) ;
66
+
67
+ var current = $ "VS ({ Process . GetCurrentProcess ( ) . Id } ) ({ currentInstanceId } )";
68
+
69
+ var hostGroup = new HostGroup ( current ) ;
70
+ var remoteHostStream = await RequestServiceAsync (
71
+ primary , WellKnownRemoteHostServices . RemoteHostService , hostGroup , timeout , cancellationToken ) . ConfigureAwait ( false ) ;
72
+
73
+ client = new ServiceHubRemoteHostClient ( workspace , primary , hostGroup , remoteHostStream ) ;
74
+
75
+ await client . _rpc . InvokeWithCancellationAsync < string > (
76
+ nameof ( IRemoteHostService . Connect ) ,
77
+ new object [ ] { current , TelemetryService . DefaultSession . SerializeSettings ( ) } ,
78
+ cancellationToken ) . ConfigureAwait ( false ) ;
79
+
80
+ return client ;
81
+ }
82
+ catch ( Exception ex )
83
+ {
84
+ // make sure we shutdown client if initializing client has failed.
85
+ client ? . Shutdown ( ) ;
86
+
87
+ // translate to our own cancellation if it is raised.
88
+ cancellationToken . ThrowIfCancellationRequested ( ) ;
89
+
90
+ // otherwise, report watson and throw original exception
91
+ WatsonReporter . Report ( "ServiceHub creation failed" , ex , ReportDetailInfo ) ;
92
+ throw ;
93
+ }
94
+ }
95
+
68
96
private static async Task RegisterWorkspaceHostAsync ( Workspace workspace , RemoteHostClient client )
69
97
{
70
98
var vsWorkspace = workspace as VisualStudioWorkspaceImpl ;
@@ -88,7 +116,7 @@ await Task.Factory.SafeStartNew(() =>
88
116
89
117
private ServiceHubRemoteHostClient (
90
118
Workspace workspace , HubClient hubClient , HostGroup hostGroup , Stream stream ) :
91
- base ( workspace )
119
+ base ( workspace )
92
120
{
93
121
_hubClient = hubClient ;
94
122
_hostGroup = hostGroup ;
@@ -136,6 +164,40 @@ private void OnRpcDisconnected(object sender, JsonRpcDisconnectedEventArgs e)
136
164
Disconnected ( ) ;
137
165
}
138
166
167
+ /// <summary>
168
+ /// call <paramref name="funcAsync"/> and retry up to <paramref name="timeout"/> if the call throws
169
+ /// <typeparamref name="TException"/>. any other exception from the call won't be handled here.
170
+ /// </summary>
171
+ private static async Task < TResult > RetryRemoteCallAsync < TException , TResult > (
172
+ Func < Task < TResult > > funcAsync ,
173
+ TimeSpan timeout ,
174
+ CancellationToken cancellationToken ) where TException : Exception
175
+ {
176
+ const int retry_delayInMS = 50 ;
177
+
178
+ var start = DateTime . UtcNow ;
179
+ while ( DateTime . UtcNow - start < timeout )
180
+ {
181
+ cancellationToken . ThrowIfCancellationRequested ( ) ;
182
+
183
+ try
184
+ {
185
+ return await funcAsync ( ) . ConfigureAwait ( false ) ;
186
+ }
187
+ catch ( TException )
188
+ {
189
+ // throw cancellation token if operation is cancelled
190
+ cancellationToken . ThrowIfCancellationRequested ( ) ;
191
+ }
192
+
193
+ // wait for retry_delayInMS before next try
194
+ await Task . Delay ( retry_delayInMS , cancellationToken ) . ConfigureAwait ( false ) ;
195
+ }
196
+
197
+ // operation timed out, more than we are willing to wait
198
+ throw new TimeoutException ( "RequestServiceAsync timed out" ) ;
199
+ }
200
+
139
201
private static async Task < Stream > RequestServiceAsync (
140
202
HubClient client ,
141
203
string serviceName ,
@@ -156,7 +218,17 @@ private static async Task<Stream> RequestServiceAsync(
156
218
{
157
219
try
158
220
{
159
- return await RequestServiceAsync ( client , descriptor , timeout , cancellationToken ) . ConfigureAwait ( false ) ;
221
+ // we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
222
+ // we have bug opened to track the issue.
223
+ // https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit
224
+
225
+ // retry on cancellation token since HubClient will throw its own cancellation token
226
+ // when it couldn't connect to service hub service for some reasons
227
+ // (ex, OOP process GC blocked and not responding to request)
228
+ return await RetryRemoteCallAsync < OperationCanceledException , Stream > (
229
+ ( ) => client . RequestServiceAsync ( descriptor , cancellationToken ) ,
230
+ timeout ,
231
+ cancellationToken ) . ConfigureAwait ( false ) ;
160
232
}
161
233
catch ( RemoteInvocationException ex )
162
234
{
@@ -184,41 +256,6 @@ private static async Task<Stream> RequestServiceAsync(
184
256
throw ExceptionUtilities . Unreachable ;
185
257
}
186
258
187
- private static async Task < Stream > RequestServiceAsync ( HubClient client , ServiceDescriptor descriptor , TimeSpan timeout , CancellationToken cancellationToken = default ( CancellationToken ) )
188
- {
189
- // we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
190
- // we have bug opened to track the issue.
191
- // https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit
192
- const int retry_delayInMS = 50 ;
193
-
194
- var start = DateTime . UtcNow ;
195
- while ( start - DateTime . UtcNow < timeout )
196
- {
197
- cancellationToken . ThrowIfCancellationRequested ( ) ;
198
-
199
- try
200
- {
201
- return await client . RequestServiceAsync ( descriptor , cancellationToken ) . ConfigureAwait ( false ) ;
202
- }
203
- catch ( OperationCanceledException )
204
- {
205
- // if it is our own cancellation token, then rethrow
206
- // otherwise, let us retry.
207
- //
208
- // we do this since HubClient itself can throw its own cancellation token
209
- // when it couldn't connect to service hub service for some reasons
210
- // (ex, OOP process GC blocked and not responding to request)
211
- cancellationToken . ThrowIfCancellationRequested ( ) ;
212
- }
213
-
214
- // wait for retry_delayInMS before next try
215
- await Task . Delay ( retry_delayInMS , cancellationToken ) . ConfigureAwait ( false ) ;
216
- }
217
-
218
- // request service to HubClient timed out, more than we are willing to wait
219
- throw new TimeoutException ( "RequestServiceAsync timed out" ) ;
220
- }
221
-
222
259
private static int ReportDetailInfo ( IFaultUtility faultUtility )
223
260
{
224
261
// 0 means send watson, otherwise, cancel watson
0 commit comments