Skip to content

Commit 11fc262

Browse files
authored
[sycnd] add 2 stage shutdown support for warm reboot (sonic-net#391)
* [syncd] add support for warm boot preshutdown operation - issue warm pre-shutdown from syncd_request_shutdown - update warm shutdown state transitions in state database - stop notifications and counter polls before pre-shutdown Signed-off-by: Ying Xie <[email protected]> * [syncd] log execution time of important operations Signed-off-by: Ying Xie <[email protected]> * Replace do {} while (0) with {} Signed-off-by: Ying Xie <[email protected]> * Expand operation 'PRE' to 'PRE-SHUTDOWN' Signed-off-by: Ying Xie <[email protected]> * code style change * [spell check test] allow word 'pre' Signed-off-by: Ying Xie <[email protected]>
1 parent 06418bb commit 11fc262

File tree

4 files changed

+107
-10
lines changed

4 files changed

+107
-10
lines changed

syncd/syncd.cpp

+84-6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <limits.h>
77

88
#include "swss/warm_restart.h"
9+
#include "swss/table.h"
910

1011
extern "C" {
1112
#include <sai.h>
@@ -1935,7 +1936,12 @@ void on_switch_create_in_init_view(
19351936

19361937
sai_object_id_t switch_rid;
19371938

1938-
sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
1939+
sai_status_t status;
1940+
1941+
{
1942+
SWSS_LOG_TIMER("cold boot: create switch");
1943+
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
1944+
}
19391945

19401946
if (status != SAI_STATUS_SUCCESS)
19411947
{
@@ -3170,6 +3176,8 @@ typedef enum _syncd_restart_type_t
31703176

31713177
SYNCD_RESTART_TYPE_FAST,
31723178

3179+
SYNCD_RESTART_TYPE_PRE_SHUTDOWN,
3180+
31733181
} syncd_restart_type_t;
31743182

31753183
syncd_restart_type_t handleRestartQuery(swss::NotificationConsumer &restartQuery)
@@ -3202,6 +3210,12 @@ syncd_restart_type_t handleRestartQuery(swss::NotificationConsumer &restartQuery
32023210
return SYNCD_RESTART_TYPE_FAST;
32033211
}
32043212

3213+
if (op == "PRE-SHUTDOWN")
3214+
{
3215+
SWSS_LOG_NOTICE("received PRE_SHUTDOWN switch event");
3216+
return SYNCD_RESTART_TYPE_PRE_SHUTDOWN;
3217+
}
3218+
32053219
SWSS_LOG_WARN("received '%s' unknown switch shutdown event, assuming COLD", op.c_str());
32063220
return SYNCD_RESTART_TYPE_COLD;
32073221
}
@@ -3452,6 +3466,8 @@ int syncd_main(int argc, char **argv)
34523466
std::shared_ptr<swss::DBConnector> dbAsic = std::make_shared<swss::DBConnector>(ASIC_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
34533467
std::shared_ptr<swss::DBConnector> dbNtf = std::make_shared<swss::DBConnector>(ASIC_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
34543468
std::shared_ptr<swss::DBConnector> dbFlexCounter = std::make_shared<swss::DBConnector>(FLEX_COUNTER_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
3469+
std::shared_ptr<swss::DBConnector> dbState = std::make_shared<swss::DBConnector>(STATE_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
3470+
std::unique_ptr<swss::Table> warmRestartTable = std::unique_ptr<swss::Table>(new swss::Table(dbState.get(), STATE_WARM_RESTART_TABLE_NAME));
34553471

34563472
g_redisClient = std::make_shared<swss::RedisClient>(dbAsic.get());
34573473

@@ -3539,6 +3555,9 @@ int syncd_main(int argc, char **argv)
35393555

35403556
syncd_restart_type_t shutdownType = SYNCD_RESTART_TYPE_COLD;
35413557

3558+
sai_switch_api_t *sai_switch_api = NULL;
3559+
sai_api_query(SAI_API_SWITCH, (void**)&sai_switch_api);
3560+
35423561
try
35433562
{
35443563
SWSS_LOG_NOTICE("before onSyncdStart");
@@ -3575,7 +3594,55 @@ int syncd_main(int argc, char **argv)
35753594
*/
35763595

35773596
shutdownType = handleRestartQuery(*restartQuery);
3578-
break;
3597+
if (shutdownType != SYNCD_RESTART_TYPE_PRE_SHUTDOWN)
3598+
{
3599+
// break out the event handling loop to shutdown syncd
3600+
break;
3601+
}
3602+
3603+
// Handle switch pre-shutdown and wait for the final shutdown
3604+
// event
3605+
3606+
SWSS_LOG_TIMER("warm pre-shutdown");
3607+
3608+
FlexCounter::removeAllCounters();
3609+
stopNotificationsProcessingThread();
3610+
3611+
sai_attribute_t attr;
3612+
3613+
attr.id = SAI_SWITCH_ATTR_RESTART_WARM;
3614+
attr.value.booldata = true;
3615+
3616+
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
3617+
3618+
if (status != SAI_STATUS_SUCCESS)
3619+
{
3620+
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s for pre-shutdown",
3621+
sai_serialize_status(status).c_str());
3622+
shutdownType = SYNCD_RESTART_TYPE_COLD;
3623+
warmRestartTable->hset("warm-shutdown", "state", "set-flag-failed");
3624+
continue;
3625+
}
3626+
3627+
attr.id = SAI_SWITCH_ATTR_PRE_SHUTDOWN;
3628+
attr.value.booldata = true;
3629+
3630+
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
3631+
if (status == SAI_STATUS_SUCCESS)
3632+
{
3633+
warmRestartTable->hset("warm-shutdown", "state", "pre-shutdown-succeeded");
3634+
}
3635+
else
3636+
{
3637+
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_PRE_SHUTDOWN=true: %s",
3638+
sai_serialize_status(status).c_str());
3639+
warmRestartTable->hset("warm-shutdown", "state", "pre-shutdown-failed");
3640+
3641+
// Restore cold shutdown.
3642+
attr.id = SAI_SWITCH_ATTR_RESTART_WARM;
3643+
attr.value.booldata = false;
3644+
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
3645+
}
35793646
}
35803647
else if (sel == flexCounter.get())
35813648
{
@@ -3598,9 +3665,6 @@ int syncd_main(int argc, char **argv)
35983665
exit_and_notify(EXIT_FAILURE);
35993666
}
36003667

3601-
sai_switch_api_t *sai_switch_api = NULL;
3602-
sai_api_query(SAI_API_SWITCH, (void**)&sai_switch_api);
3603-
36043668
if (shutdownType == SYNCD_RESTART_TYPE_WARM)
36053669
{
36063670
const char *warmBootWriteFile = profile_get_value(0, SAI_KEY_WARM_BOOT_WRITE_FILE);
@@ -3612,6 +3676,7 @@ int syncd_main(int argc, char **argv)
36123676
SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown");
36133677

36143678
shutdownType = SYNCD_RESTART_TYPE_COLD;
3679+
warmRestartTable->hset("warm-shutdown", "state", "warm-shutdown-failed");
36153680
}
36163681
else
36173682
{
@@ -3629,6 +3694,7 @@ int syncd_main(int argc, char **argv)
36293694
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s, fall back to cold restart",
36303695
sai_serialize_status(status).c_str());
36313696
shutdownType = SYNCD_RESTART_TYPE_COLD;
3697+
warmRestartTable->hset("warm-shutdown", "state", "set-flag-failed");
36323698
}
36333699
}
36343700
}
@@ -3662,13 +3728,25 @@ int syncd_main(int argc, char **argv)
36623728
// Stop notification thread before removing switch
36633729
stopNotificationsProcessingThread();
36643730

3665-
status = sai_switch_api->remove_switch(gSwitchId);
3731+
{
3732+
SWSS_LOG_TIMER("remove switch");
3733+
status = sai_switch_api->remove_switch(gSwitchId);
3734+
}
3735+
36663736
if (status != SAI_STATUS_SUCCESS)
36673737
{
36683738
SWSS_LOG_NOTICE("Can't delete a switch. gSwitchId=0x%lx status=%s", gSwitchId,
36693739
sai_serialize_status(status).c_str());
36703740
}
36713741

3742+
if (shutdownType == SYNCD_RESTART_TYPE_WARM)
3743+
{
3744+
warmRestartTable->hset("warm-shutdown", "state",
3745+
(status == SAI_STATUS_SUCCESS) ?
3746+
"warm-shutdown-succeeded":
3747+
"warm-shutdown-failed");
3748+
}
3749+
36723750
SWSS_LOG_NOTICE("calling api uninitialize");
36733751

36743752
status = sai_api_uninitialize();

syncd/syncd_hard_reinit.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,12 @@ void processSwitches()
435435
SWSS_LOG_NOTICE("creating switch VID: %s",
436436
sai_serialize_object_id(switch_vid).c_str());
437437

438-
sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
438+
sai_status_t status;
439+
440+
{
441+
SWSS_LOG_TIMER("Cold boot: create switch");
442+
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
443+
}
439444

440445
gSwitchId = switch_rid;
441446
SWSS_LOG_NOTICE("Initialize gSwitchId with ID = 0x%lx", gSwitchId);
@@ -1286,7 +1291,12 @@ void performWarmRestart()
12861291
switch_attrs[i+1].value.ptr = (void *)1; // any non-null pointer
12871292
}
12881293
check_notifications_pointers((uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
1289-
sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, (uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
1294+
sai_status_t status;
1295+
1296+
{
1297+
SWSS_LOG_TIMER("Warm boot: create switch");
1298+
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, (uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
1299+
}
12901300

12911301
if (status != SAI_STATUS_SUCCESS)
12921302
{

syncd/syncd_request_shutdown.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ int main(int argc, char **argv)
1616
static struct option long_options[] =
1717
{
1818
{ "cold", no_argument, 0, 'c' },
19-
{ "warm", no_argument, 0, 'w' }
19+
{ "warm", no_argument, 0, 'w' },
20+
{ "fast", no_argument, 0, 'f' },
21+
{ "pre", no_argument, 0, 'p' }, // Requesting pre shutdown
2022
};
2123

2224
std::string op;
@@ -26,7 +28,7 @@ int main(int argc, char **argv)
2628
{
2729
int option_index = 0;
2830

29-
int c = getopt_long(argc, argv, "cw", long_options, &option_index);
31+
int c = getopt_long(argc, argv, "cwfp", long_options, &option_index);
3032

3133
if (c == -1)
3234
break;
@@ -48,6 +50,11 @@ int main(int argc, char **argv)
4850
optionSpecified = true;
4951
break;
5052

53+
case 'p':
54+
op = "PRE-SHUTDOWN";
55+
optionSpecified = true;
56+
break;
57+
5158
default:
5259
SWSS_LOG_ERROR("getopt failure");
5360
exit(EXIT_FAILURE);
@@ -61,6 +68,7 @@ int main(int argc, char **argv)
6168
std::cerr << "Shutdown option must be specified" << std::endl;
6269
std::cerr << "---------------------------------" << std::endl;
6370
std::cerr << " --warm -w for warm restart" << std::endl;
71+
std::cerr << " --pre -p for warm pre-shutdown" << std::endl;
6472
std::cerr << " --cold -c for cold restart" << std::endl;
6573
std::cerr << " --fast -f for fast restart" << std::endl;
6674

tests/aspell.en.pws

+1
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,4 @@ VXLAN
229229
workaroung
230230
xoff
231231
xon
232+
pre

0 commit comments

Comments
 (0)