@@ -60,7 +60,6 @@ func (s) TestResourceUpdateMetrics(t *testing.T) {
60
60
if err != nil {
61
61
t .Fatalf ("net.Listen() failed: %v" , err )
62
62
}
63
-
64
63
mgmtServer := e2e .StartManagementServer (t , e2e.ManagementServerOptions {Listener : l })
65
64
const listenerResourceName = "test-listener-resource"
66
65
const routeConfigurationName = "test-route-configuration-resource"
@@ -147,3 +146,170 @@ func (s) TestResourceUpdateMetrics(t *testing.T) {
147
146
t .Fatalf ("Unexpected data for metric \" grpc.xds_client.resource_updates_invalid\" , got: %v, want: %v" , got , 1 )
148
147
}
149
148
}
149
+
150
+ // TestServerFailureMetrics_BeforeResponseRecv configures an xDS client, and a
151
+ // management server. It then register a watcher and stops the management
152
+ // server before sending a resource update, and verifies that the expected
153
+ // metrics for server failure are emitted.
154
+ func (s ) TestServerFailureMetrics_BeforeResponseRecv (t * testing.T ) {
155
+ ctx , cancel := context .WithTimeout (context .Background (), defaultTestTimeout )
156
+ defer cancel ()
157
+
158
+ tmr := stats .NewTestMetricsRecorder ()
159
+ l , err := testutils .LocalTCPListener ()
160
+ if err != nil {
161
+ t .Fatalf ("net.Listen() failed: %v" , err )
162
+ }
163
+ mgmtServer := e2e .StartManagementServer (t , e2e.ManagementServerOptions {Listener : l })
164
+ nodeID := uuid .New ().String ()
165
+
166
+ bootstrapContents , err := bootstrap .NewContentsForTesting (bootstrap.ConfigOptionsForTesting {
167
+ Servers : []byte (fmt .Sprintf (`[{
168
+ "server_uri": %q,
169
+ "channel_creds": [{"type": "insecure"}]
170
+ }]` , mgmtServer .Address )),
171
+ Node : []byte (fmt .Sprintf (`{"id": "%s"}` , nodeID )),
172
+ Authorities : map [string ]json.RawMessage {
173
+ "authority" : []byte ("{}" ),
174
+ },
175
+ })
176
+ if err != nil {
177
+ t .Fatalf ("Failed to create bootstrap configuration: %v" , err )
178
+ }
179
+
180
+ config , err := bootstrap .NewConfigFromContents (bootstrapContents )
181
+ if err != nil {
182
+ t .Fatalf ("Failed to parse bootstrap contents: %s, %v" , string (bootstrapContents ), err )
183
+ }
184
+ pool := NewPool (config )
185
+ client , close , err := pool .NewClientForTesting (OptionsForTesting {
186
+ Name : t .Name (),
187
+ WatchExpiryTimeout : defaultTestWatchExpiryTimeout ,
188
+ MetricsRecorder : tmr ,
189
+ })
190
+ if err != nil {
191
+ t .Fatalf ("Failed to create an xDS client: %v" , err )
192
+ }
193
+ defer close ()
194
+
195
+ const listenerResourceName = "test-listener-resource"
196
+
197
+ // Watch for the listener on the above management server.
198
+ xdsresource .WatchListener (client , listenerResourceName , noopListenerWatcher {})
199
+
200
+ // Close the listener and ensure that the ADS stream breaks. This should
201
+ // cause a server failure count to emit eventually.
202
+ l .Close ()
203
+ select {
204
+ case <- ctx .Done ():
205
+ t .Fatal ("Timeout when waiting for ADS stream to close" )
206
+ default :
207
+ }
208
+
209
+ mdWant := stats.MetricsData {
210
+ Handle : xdsClientServerFailureMetric .Descriptor (),
211
+ IntIncr : 1 ,
212
+ LabelKeys : []string {"grpc.target" , "grpc.xds.server" },
213
+ LabelVals : []string {"Test/ServerFailureMetrics_BeforeResponseRecv" , mgmtServer .Address },
214
+ }
215
+ if err := tmr .WaitForInt64Count (ctx , mdWant ); err != nil {
216
+ t .Fatal (err .Error ())
217
+ }
218
+ }
219
+
220
+ // TestServerFailureMetrics_AfterResponseRecv configures an xDS client, and a
221
+ // management server to send a valid LDS updates, and verifies that the
222
+ // server failure metric is not emitted. It then closes the management server
223
+ // listener to close the ADS stream and verifies that the server failure metric
224
+ // is still not emitted because the the ADS stream was closed after having
225
+ // received a response on the stream.
226
+ func (s ) TestServerFailureMetrics_AfterResponseRecv (t * testing.T ) {
227
+ ctx , cancel := context .WithTimeout (context .Background (), defaultTestTimeout )
228
+ defer cancel ()
229
+
230
+ tmr := stats .NewTestMetricsRecorder ()
231
+ l , err := testutils .LocalTCPListener ()
232
+ if err != nil {
233
+ t .Fatalf ("net.Listen() failed: %v" , err )
234
+ }
235
+ lis := testutils .NewRestartableListener (l )
236
+ mgmtServer := e2e .StartManagementServer (t , e2e.ManagementServerOptions {Listener : lis })
237
+ const listenerResourceName = "test-listener-resource"
238
+ const routeConfigurationName = "test-route-configuration-resource"
239
+ nodeID := uuid .New ().String ()
240
+ resources := e2e.UpdateOptions {
241
+ NodeID : nodeID ,
242
+ Listeners : []* v3listenerpb.Listener {e2e .DefaultClientListener (listenerResourceName , routeConfigurationName )},
243
+ SkipValidation : true ,
244
+ }
245
+ if err := mgmtServer .Update (ctx , resources ); err != nil {
246
+ t .Fatalf ("Failed to update management server with resources: %v, err: %v" , resources , err )
247
+ }
248
+
249
+ bootstrapContents , err := bootstrap .NewContentsForTesting (bootstrap.ConfigOptionsForTesting {
250
+ Servers : []byte (fmt .Sprintf (`[{
251
+ "server_uri": %q,
252
+ "channel_creds": [{"type": "insecure"}]
253
+ }]` , mgmtServer .Address )),
254
+ Node : []byte (fmt .Sprintf (`{"id": "%s"}` , nodeID )),
255
+ Authorities : map [string ]json.RawMessage {
256
+ "authority" : []byte ("{}" ),
257
+ },
258
+ })
259
+ if err != nil {
260
+ t .Fatalf ("Failed to create bootstrap configuration: %v" , err )
261
+ }
262
+
263
+ config , err := bootstrap .NewConfigFromContents (bootstrapContents )
264
+ if err != nil {
265
+ t .Fatalf ("Failed to parse bootstrap contents: %s, %v" , string (bootstrapContents ), err )
266
+ }
267
+ pool := NewPool (config )
268
+ client , close , err := pool .NewClientForTesting (OptionsForTesting {
269
+ Name : t .Name (),
270
+ MetricsRecorder : tmr ,
271
+ })
272
+ if err != nil {
273
+ t .Fatalf ("Failed to create an xDS client: %v" , err )
274
+ }
275
+ defer close ()
276
+
277
+ // Watch the valid listener configured on the management server. This should
278
+ // cause a resource updates valid count to emit eventually.
279
+ xdsresource .WatchListener (client , listenerResourceName , noopListenerWatcher {})
280
+ mdWant := stats.MetricsData {
281
+ Handle : xdsClientResourceUpdatesValidMetric .Descriptor (),
282
+ IntIncr : 1 ,
283
+ LabelKeys : []string {"grpc.target" , "grpc.xds.server" , "grpc.xds.resource_type" },
284
+ LabelVals : []string {"Test/ServerFailureMetrics_AfterResponseRecv" , mgmtServer .Address , "ListenerResource" },
285
+ }
286
+ if err := tmr .WaitForInt64Count (ctx , mdWant ); err != nil {
287
+ t .Fatal (err .Error ())
288
+ }
289
+ // Server failure should have no recording point.
290
+ if got , _ := tmr .Metric ("grpc.xds_client.server_failure" ); got != 0 {
291
+ t .Fatalf ("Unexpected data for metric \" grpc.xds_client.server_failure\" , got: %v, want: %v" , got , 0 )
292
+ }
293
+
294
+ // Close the listener and ensure that the ADS stream breaks. This should
295
+ // cause a server failure count to emit eventually.
296
+ lis .Stop ()
297
+ select {
298
+ case <- ctx .Done ():
299
+ t .Fatal ("Timeout when waiting for ADS stream to close" )
300
+ default :
301
+ }
302
+ // Restart to prevent the attempt to create a new ADS stream after back off.
303
+ lis .Restart ()
304
+
305
+ mdWant = stats.MetricsData {
306
+ Handle : xdsClientServerFailureMetric .Descriptor (),
307
+ IntIncr : 1 ,
308
+ LabelKeys : []string {"grpc.target" , "grpc.xds.server" },
309
+ LabelVals : []string {"Test/ServerFailureMetrics_AfterResponseRecv" , mgmtServer .Address },
310
+ }
311
+ // Server failure should still have no recording point.
312
+ if err := tmr .WaitForInt64Count (ctx , mdWant ); err == nil {
313
+ t .Fatal ("tmr.WaitForInt64Count(ctx, mdWant) succeeded when expected to timeout." )
314
+ }
315
+ }
0 commit comments