@@ -253,6 +253,17 @@ def __init__(self):
253
253
self .kvm_test = True
254
254
else :
255
255
self .kvm_test = False
256
+ if "service-warm-restart" in self .test_params ['reboot_type' ]:
257
+ self .check_param ('service_list' , None , required = True )
258
+ self .check_param ('service_data' , None , required = True )
259
+ self .service_data = self .test_params ['service_data' ]
260
+ for service_name in self .test_params ['service_list' ]:
261
+ cmd = 'systemctl show -p ExecMainStartTimestamp {}' .format (service_name )
262
+ stdout , _ , _ = self .dut_connection .execCommand (cmd )
263
+ if service_name not in self .service_data :
264
+ self .service_data [service_name ] = {}
265
+ self .service_data [service_name ]['service_start_time' ] = str (stdout [0 ]).strip ()
266
+ self .log ("Service start time for {} is {}" .format (service_name , self .service_data [service_name ]['service_start_time' ]))
256
267
return
257
268
258
269
def read_json (self , name ):
@@ -437,7 +448,7 @@ def build_vlan_if_port_mapping(self):
437
448
portchannel_names = [pc ['name' ] for pc in portchannel_content .values ()]
438
449
439
450
vlan_content = self .read_json ('vlan_ports_file' )
440
-
451
+
441
452
vlan_if_port = []
442
453
for vlan in self .vlan_ip_range :
443
454
for ifname in vlan_content [vlan ]['members' ]:
@@ -926,6 +937,31 @@ def wait_until_control_plane_up(self):
926
937
self .no_control_stop = datetime .datetime .now ()
927
938
self .log ("Dut reboots: control plane up at %s" % str (self .no_control_stop ))
928
939
940
+ def wait_until_service_restart (self ):
941
+ self .log ("Wait until sevice restart" )
942
+ self .reboot_start = datetime .datetime .now ()
943
+ service_set = set (self .test_params ['service_list' ])
944
+ wait_time = 120
945
+ while wait_time > 0 :
946
+ for service_name in self .test_params ['service_list' ]:
947
+ if service_name not in service_set :
948
+ continue
949
+ cmd = 'systemctl show -p ExecMainStartTimestamp {}' .format (service_name )
950
+ stdout , _ , _ = self .dut_connection .execCommand (cmd )
951
+ if self .service_data [service_name ]['service_start_time' ] != str (stdout [0 ]).strip ():
952
+ service_set .remove (service_name )
953
+ if not service_set :
954
+ break
955
+ wait_time -= 10
956
+ time .sleep (10 )
957
+
958
+ if service_set :
959
+ self .fails ['dut' ].add ("Container {} hasn't come back up in {} seconds" .format (',' .join (service_set ), wait_time ))
960
+ raise TimeoutError
961
+
962
+ # TODO: add timestamp
963
+ self .log ("Service has restarted" )
964
+
929
965
def handle_fast_reboot_health_check (self ):
930
966
self .log ("Check that device is still forwarding data plane traffic" )
931
967
self .fails ['dut' ].add ("Data plane has a forwarding problem after CPU went down" )
@@ -1017,6 +1053,10 @@ def wait_for_ssh_threads(signal):
1017
1053
# verify there are no interface flaps after warm boot
1018
1054
self .neigh_lag_status_check ()
1019
1055
1056
+ if 'service-warm-restart' == self .reboot_type :
1057
+ # verify there are no interface flaps after warm boot
1058
+ self .neigh_lag_status_check ()
1059
+
1020
1060
def handle_advanced_reboot_health_check_kvm (self ):
1021
1061
self .log ("Wait until data plane stops" )
1022
1062
forward_stop_signal = multiprocessing .Event ()
@@ -1193,8 +1233,11 @@ def runTest(self):
1193
1233
thr = threading .Thread (target = self .reboot_dut )
1194
1234
thr .setDaemon (True )
1195
1235
thr .start ()
1196
- self .wait_until_control_plane_down ()
1197
- self .no_control_start = self .cpu_state .get_state_time ('down' )
1236
+ if self .reboot_type != 'service-warm-restart' :
1237
+ self .wait_until_control_plane_down ()
1238
+ self .no_control_start = self .cpu_state .get_state_time ('down' )
1239
+ else :
1240
+ self .wait_until_service_restart ()
1198
1241
1199
1242
if 'warm-reboot' in self .reboot_type :
1200
1243
finalizer_timeout = 60 + self .test_params ['reboot_limit_in_seconds' ]
@@ -1210,7 +1253,7 @@ def runTest(self):
1210
1253
else :
1211
1254
if self .reboot_type == 'fast-reboot' :
1212
1255
self .handle_fast_reboot_health_check ()
1213
- if 'warm-reboot' in self .reboot_type :
1256
+ if 'warm-reboot' in self .reboot_type or 'service-warm-restart' == self . reboot_type :
1214
1257
self .handle_warm_reboot_health_check ()
1215
1258
self .handle_post_reboot_health_check ()
1216
1259
@@ -1276,15 +1319,20 @@ def reboot_dut(self):
1276
1319
time .sleep (self .reboot_delay )
1277
1320
1278
1321
if not self .kvm_test and \
1279
- (self .reboot_type == 'fast-reboot' or 'warm-reboot' in self .reboot_type ):
1322
+ (self .reboot_type == 'fast-reboot' or 'warm-reboot' in self .reboot_type or 'service-warm-restart' in self . reboot_type ):
1280
1323
self .sender_thr = threading .Thread (target = self .send_in_background )
1281
1324
self .sniff_thr = threading .Thread (target = self .sniff_in_background )
1282
1325
self .sniffer_started = threading .Event () # Event for the sniff_in_background status.
1283
1326
self .sniff_thr .start ()
1284
1327
self .sender_thr .start ()
1285
1328
1286
1329
self .log ("Rebooting remote side" )
1287
- stdout , stderr , return_code = self .dut_connection .execCommand ("sudo " + self .reboot_type , timeout = 30 )
1330
+ if self .reboot_type != 'service-warm-restart' :
1331
+ stdout , stderr , return_code = self .dut_connection .execCommand ("sudo " + self .reboot_type , timeout = 30 )
1332
+ else :
1333
+ self .restart_service ()
1334
+ return
1335
+
1288
1336
if stdout != []:
1289
1337
self .log ("stdout from %s: %s" % (self .reboot_type , str (stdout )))
1290
1338
if stderr != []:
@@ -1300,6 +1348,42 @@ def reboot_dut(self):
1300
1348
1301
1349
return
1302
1350
1351
+ def restart_service (self ):
1352
+ for service_name in self .test_params ['service_list' ]:
1353
+ if 'image_path_on_dut' in self .service_data [service_name ]:
1354
+ stdout , stderr , return_code = self .dut_connection .execCommand ("sudo sonic-installer upgrade-docker {} {} -y --warm" .format (service_name , self .service_data [service_name ]['image_path_on_dut' ]), timeout = 30 )
1355
+ else :
1356
+ self .dut_connection .execCommand ('sudo config warm_restart enable {}' .format (service_name ))
1357
+ self .pre_service_warm_restart (service_name )
1358
+ stdout , stderr , return_code = self .dut_connection .execCommand ('sudo service {} restart' .format (service_name ))
1359
+
1360
+ if stdout != []:
1361
+ self .log ("stdout from %s %s: %s" % (self .reboot_type , service_name , str (stdout )))
1362
+ if stderr != []:
1363
+ self .log ("stderr from %s %s: %s" % (self .reboot_type , service_name , str (stderr )))
1364
+ self .fails ['dut' ].add ("service warm restart {} failed with error {}" .format (service_name , stderr ))
1365
+ thread .interrupt_main ()
1366
+ raise Exception ("{} failed with error {}" .format (self .reboot_type , stderr ))
1367
+ self .log ("return code from %s %s: %s" % (self .reboot_type , service_name , str (return_code )))
1368
+ if return_code not in [0 , 255 ]:
1369
+ thread .interrupt_main ()
1370
+
1371
+ def pre_service_warm_restart (self , service_name ):
1372
+ """Copy from src/sonic-utilities/sonic_installer/main.py to do some special operation for particular containers
1373
+ """
1374
+ if service_name == 'swss' :
1375
+ cmd = 'docker exec -i swss orchagent_restart_check -w 2000 -r 5'
1376
+ stdout , stderr , return_code = self .dut_connection .execCommand (cmd )
1377
+ if return_code != 0 :
1378
+ self .log ('stdout from {}: {}' .format (cmd , str (stdout )))
1379
+ self .log ('stderr from {}: {}' .format (cmd , str (stderr )))
1380
+ self .log ('orchagent is not in clean state, RESTARTCHECK failed: {}' .format (return_code ))
1381
+ elif service_name == 'bgp' :
1382
+ self .dut_connection .execCommand ('docker exec -i bgp pkill -9 zebra' )
1383
+ self .dut_connection .execCommand ('docker exec -i bgp pkill -9 bgpd' )
1384
+ elif service_name == 'teamd' :
1385
+ self .dut_connection .execCommand ('docker exec -i teamd pkill -USR1 teamd > /dev/null' )
1386
+
1303
1387
def cmd (self , cmds ):
1304
1388
process = subprocess .Popen (cmds ,
1305
1389
shell = False ,
@@ -1325,7 +1409,7 @@ def peer_state_check(self, ip, queue):
1325
1409
lacp_pdu_down_times and len (lacp_pdu_down_times ) > 0 else None
1326
1410
lacp_pdu_after_reboot = float (lacp_pdu_up_times [0 ]) if \
1327
1411
lacp_pdu_up_times and len (lacp_pdu_up_times ) > 0 else None
1328
- if 'warm-reboot' in self .reboot_type and lacp_pdu_before_reboot and lacp_pdu_after_reboot :
1412
+ if ( 'warm-reboot' in self .reboot_type or 'service-warm-restart' in self . reboot_type ) and lacp_pdu_before_reboot and lacp_pdu_after_reboot :
1329
1413
lacp_time_diff = lacp_pdu_after_reboot - lacp_pdu_before_reboot
1330
1414
if lacp_time_diff >= 90 and not self .kvm_test :
1331
1415
self .fails ['dut' ].add ("LACP session likely terminated by neighbor ({})" .format (ip ) + \
0 commit comments