Skip to content

Commit 1f4a1d7

Browse files
authored
Add warm boot support with removed/created port (sonic-net#515)
* Add warm boot support with removed/created port * Fix aspell * Update sai_warmboot.bin location to local folder
1 parent 59e530a commit 1f4a1d7

16 files changed

+424
-22
lines changed

syncd/syncd.cpp

+27-5
Original file line numberDiff line numberDiff line change
@@ -1257,6 +1257,7 @@ void get_port_related_objects(
12571257

12581258
void post_port_remove(
12591259
_In_ std::shared_ptr<SaiSwitch> sw,
1260+
_In_ sai_object_id_t port_rid,
12601261
_In_ const std::vector<sai_object_id_t>& relatedRids)
12611262
{
12621263
SWSS_LOG_ENTER();
@@ -1307,16 +1308,16 @@ void post_port_remove(
13071308

13081309
sai_object_type_t ot = redis_sai_object_type_query(vid);
13091310

1310-
std::string key = sai_serialize_object_type(ot) + ":" + str_vid;
1311+
std::string key = ASIC_STATE_TABLE + std::string(":") + sai_serialize_object_type(ot) + ":" + str_vid;
13111312

13121313
SWSS_LOG_INFO("removing ASIC DB key: %s", key.c_str());
13131314

13141315
g_redisClient->del(key);
13151316
}
13161317

1317-
SWSS_LOG_NOTICE("post port remove actions succeeded");
1318+
sw->onPostPortRemove(port_rid);
13181319

1319-
// TODO lane map must be updated (for warm boot)
1320+
SWSS_LOG_NOTICE("post port remove actions succeeded");
13201321
}
13211322

13221323
void post_port_create(
@@ -1328,7 +1329,7 @@ void post_port_create(
13281329

13291330
sw->onPostPortCreate(port_rid, port_vid);
13301331

1331-
// TODO lane map must be updated (for warm boot)
1332+
SWSS_LOG_NOTICE("post port create actions succeeded");
13321333
}
13331334

13341335
sai_status_t handle_generic(
@@ -1560,7 +1561,7 @@ sai_status_t handle_generic(
15601561

15611562
if (object_type == SAI_OBJECT_TYPE_PORT)
15621563
{
1563-
post_port_remove(switches.at(switch_vid), related);
1564+
post_port_remove(switches.at(switch_vid), rid, related);
15641565
}
15651566
}
15661567
}
@@ -2418,6 +2419,16 @@ sai_status_t processEventInInitViewMode(
24182419
{
24192420
case SAI_COMMON_API_CREATE:
24202421

2422+
if (object_type == SAI_OBJECT_TYPE_PORT)
2423+
{
2424+
// reason for this is that if user will create port,
2425+
// new port is not actually created so when for example
2426+
// querying new queues for new created port, there are
2427+
// not there, since no actual port create was issued on
2428+
// the ASIC
2429+
SWSS_LOG_THROW("port object can't be created in init view mode");
2430+
}
2431+
24212432
if (info->isnonobjectid)
24222433
{
24232434
/*
@@ -2450,6 +2461,17 @@ sai_status_t processEventInInitViewMode(
24502461

24512462
case SAI_COMMON_API_REMOVE:
24522463

2464+
if (object_type == SAI_OBJECT_TYPE_PORT)
2465+
{
2466+
// reason for this is that if user will remove port, actual
2467+
// resources for it wont be release, lanes would be still
2468+
// occupied and there is extra logic required in post port
2469+
// remove which clears OIDs (ipgs,queues,SGs) from redis db
2470+
// that are automatically removed by vendor SAI, and comparison
2471+
// logic don't support that
2472+
SWSS_LOG_THROW("port object can't be removed in init view mode");
2473+
}
2474+
24532475
if (object_type == SAI_OBJECT_TYPE_SWITCH)
24542476
{
24552477
/*

syncd/syncd_applyview.cpp

+40-1
Original file line numberDiff line numberDiff line change
@@ -6985,6 +6985,7 @@ void populateExistingObjects(
69856985
auto sw = switches.begin()->second;
69866986

69876987
auto coldBootDiscoveredVids = sw->getColdBootDiscoveredVids();
6988+
auto warmBootDiscoveredVids = sw->getWarmBootDiscoveredVids();
69886989

69896990
/*
69906991
* If some objects that are existing objects on switch are not present in
@@ -7058,9 +7059,47 @@ void populateExistingObjects(
70587059
* NOTE: If we are here, then this RID exists only in current view, and
70597060
* if this object contains any OID attributes, discovery logic queried
70607061
* them so they are also existing in current view.
7062+
*
7063+
* Also in warm boot, when user removed port, and then created some new
7064+
* ports, new QUEUEs, IPGs and SGs will be created automatically by
7065+
* SAI. Those new created objects mot likely will have different RID
7066+
* values then previous instances for given port. Those values should
7067+
* also be copied to temporary view, since they will not exist on cold
7068+
* boot discovered VIDs. If not, then comparison logic will try to remove
7069+
* them which is not what we want.
7070+
*
7071+
* This is tricky scenario, and there could be some issues also when
7072+
* other object types would be created by user.
70617073
*/
70627074

7063-
if (coldBootDiscoveredVids.find(vid) == coldBootDiscoveredVids.end())
7075+
bool performColdCheck = true;
7076+
7077+
if (warmBootDiscoveredVids.find(vid) != warmBootDiscoveredVids.end())
7078+
{
7079+
sai_object_type_t ot = redis_sai_object_type_query(vid);
7080+
7081+
switch (ot)
7082+
{
7083+
case SAI_OBJECT_TYPE_QUEUE:
7084+
case SAI_OBJECT_TYPE_INGRESS_PRIORITY_GROUP:
7085+
case SAI_OBJECT_TYPE_SCHEDULER_GROUP:
7086+
7087+
// TODO this case may require adjustment, if user will do a
7088+
// warm boot then remove/add some ports and make another
7089+
// warm boot, it may happen that current logic will be
7090+
// confused which of those objects are from previous warm
7091+
// boot or second one, need better way to mark changes to
7092+
// those objects in redis DB between warm boots
7093+
7094+
performColdCheck = false;
7095+
7096+
break;
7097+
default:
7098+
break;
7099+
}
7100+
}
7101+
7102+
if (performColdCheck && coldBootDiscoveredVids.find(vid) == coldBootDiscoveredVids.end())
70647103
{
70657104
SWSS_LOG_INFO("object is not on default existing list: %s RID %s VID %s",
70667105
sai_serialize_object_type(sai_object_type_query(rid)).c_str(),

syncd/syncd_hard_reinit.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1349,7 +1349,7 @@ void performWarmRestart()
13491349
* Perform all get operations on existing switch.
13501350
*/
13511351

1352-
auto sw = switches[switch_vid] = std::make_shared<SaiSwitch>(switch_vid, switch_rid);
1352+
auto sw = switches[switch_vid] = std::make_shared<SaiSwitch>(switch_vid, switch_rid, true);
13531353

13541354
g_switch_rid = switch_rid;
13551355
g_switch_vid = switch_vid;

syncd/syncd_saiswitch.cpp

+133-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <unordered_map>
88
#include <set>
99

10+
const int maxLanesPerPort = 8;
11+
1012
/*
1113
* NOTE: all those methods could be implemented inside SaiSwitch class so then
1214
* we could skip using switch_id in params and even they could be public then.
@@ -148,18 +150,16 @@ std::unordered_map<sai_uint32_t, sai_object_id_t> SaiSwitch::saiGetHardwareLaneM
148150
* addressed in future.
149151
*/
150152

151-
const int lanesPerPort = 8;
152-
153153
for (const auto &port_rid : portList)
154154
{
155-
sai_uint32_t lanes[lanesPerPort];
155+
sai_uint32_t lanes[maxLanesPerPort];
156156

157157
memset(lanes, 0, sizeof(lanes));
158158

159159
sai_attribute_t attr;
160160

161161
attr.id = SAI_PORT_ATTR_HW_LANE_LIST;
162-
attr.value.u32list.count = lanesPerPort;
162+
attr.value.u32list.count = maxLanesPerPort;
163163
attr.value.u32list.list = lanes;
164164

165165
sai_status_t status = sai_metadata_sai_port_api->get_port_attribute(port_rid, 1, &attr);
@@ -1059,6 +1059,13 @@ std::set<sai_object_id_t> SaiSwitch::getColdBootDiscoveredVids() const
10591059
return discoveredVids;
10601060
}
10611061

1062+
std::set<sai_object_id_t> SaiSwitch::getWarmBootDiscoveredVids() const
1063+
{
1064+
SWSS_LOG_ENTER();
1065+
1066+
return m_warmBootDiscoveredVids;
1067+
}
1068+
10621069
void SaiSwitch::redisSaveColdBootDiscoveredVids() const
10631070
{
10641071
SWSS_LOG_ENTER();
@@ -1224,6 +1231,21 @@ sai_object_id_t SaiSwitch::getDefaultValueForOidAttr(
12241231
return ita->second;
12251232
}
12261233

1234+
void SaiSwitch::helperPopulateWarmBootVids()
1235+
{
1236+
SWSS_LOG_ENTER();
1237+
1238+
if (!m_warmBoot)
1239+
return;
1240+
1241+
for (sai_object_id_t rid: m_discovered_rids)
1242+
{
1243+
sai_object_id_t vid = translate_rid_to_vid(rid, m_switch_vid);
1244+
1245+
m_warmBootDiscoveredVids.insert(vid);
1246+
}
1247+
}
1248+
12271249
/*
12281250
* NOTE: If real ID will change during hard restarts, then we need to remap all
12291251
* VID/RID, but we can only do that if we will save entire tree with all
@@ -1232,7 +1254,9 @@ sai_object_id_t SaiSwitch::getDefaultValueForOidAttr(
12321254

12331255
SaiSwitch::SaiSwitch(
12341256
_In_ sai_object_id_t switch_vid,
1235-
_In_ sai_object_id_t switch_rid)
1257+
_In_ sai_object_id_t switch_rid,
1258+
_In_ bool warmBoot):
1259+
m_warmBoot(warmBoot)
12361260
{
12371261
SWSS_LOG_ENTER();
12381262

@@ -1263,9 +1287,68 @@ SaiSwitch::SaiSwitch(
12631287

12641288
helperLoadColdVids();
12651289

1290+
helperPopulateWarmBootVids();
1291+
12661292
saiGetMacAddress(m_default_mac_address);
12671293
}
12681294

1295+
std::vector<uint32_t> SaiSwitch::saiGetPortLanes(
1296+
_In_ sai_object_id_t port_rid)
1297+
{
1298+
SWSS_LOG_ENTER();
1299+
1300+
std::vector<uint32_t> lanes;
1301+
1302+
lanes.resize(maxLanesPerPort);
1303+
1304+
sai_attribute_t attr;
1305+
1306+
attr.id = SAI_PORT_ATTR_HW_LANE_LIST;
1307+
attr.value.u32list.count = maxLanesPerPort;
1308+
attr.value.u32list.list = lanes.data();
1309+
1310+
sai_status_t status = sai_metadata_sai_port_api->get_port_attribute(port_rid, 1, &attr);
1311+
1312+
if (status != SAI_STATUS_SUCCESS)
1313+
{
1314+
SWSS_LOG_THROW("failed to get hardware lane list port RID %s: %s",
1315+
sai_serialize_object_id(port_rid).c_str(),
1316+
sai_serialize_status(status).c_str());
1317+
}
1318+
1319+
if (attr.value.u32list.count == 0)
1320+
{
1321+
SWSS_LOG_THROW("switch returned lane count ZERO for port RID %s",
1322+
sai_serialize_object_id(port_rid).c_str());
1323+
}
1324+
1325+
lanes.resize(attr.value.u32list.count);
1326+
1327+
return lanes;
1328+
}
1329+
1330+
void SaiSwitch::redisUpdatePortLaneMap(
1331+
_In_ sai_object_id_t port_rid)
1332+
{
1333+
SWSS_LOG_ENTER();
1334+
1335+
auto lanes = saiGetPortLanes(port_rid);
1336+
1337+
for (uint32_t lane: lanes)
1338+
{
1339+
std::string strLane = sai_serialize_number(lane);
1340+
std::string strPortId = sai_serialize_object_id(port_rid);
1341+
1342+
auto key = getRedisLanesKey();
1343+
1344+
g_redisClient->hset(key, strLane, strPortId);
1345+
}
1346+
1347+
SWSS_LOG_NOTICE("added %zu lanes to redis lane map for port RID %s",
1348+
lanes.size(),
1349+
sai_serialize_object_id(port_rid).c_str());
1350+
}
1351+
12691352
void SaiSwitch::onPostPortCreate(
12701353
_In_ sai_object_id_t port_rid,
12711354
_In_ sai_object_id_t port_vid)
@@ -1282,7 +1365,8 @@ void SaiSwitch::onPostPortCreate(
12821365

12831366
m_discovered_rids.insert(discovered.begin(), discovered.end());
12841367

1285-
SWSS_LOG_NOTICE("putting ALL new discovered objects to redis");
1368+
SWSS_LOG_NOTICE("putting ALL new discovered objects to redis for port %s",
1369+
sai_serialize_object_id(port_vid).c_str());
12861370

12871371
for (sai_object_id_t rid: discovered)
12881372
{
@@ -1296,5 +1380,48 @@ void SaiSwitch::onPostPortCreate(
12961380

12971381
redisSetDummyAsicStateForRealObjectId(rid);
12981382
}
1383+
1384+
redisUpdatePortLaneMap(port_rid);
12991385
}
13001386

1387+
void SaiSwitch::onPostPortRemove(
1388+
_In_ sai_object_id_t port_rid)
1389+
{
1390+
SWSS_LOG_ENTER();
1391+
1392+
int removed = 0;
1393+
1394+
// key - lane number, value - port RID
1395+
auto map = redisGetLaneMap();
1396+
1397+
for (auto& kv: map)
1398+
{
1399+
if (kv.second == port_rid)
1400+
{
1401+
auto key = getRedisLanesKey();
1402+
1403+
std::string strLane = sai_serialize_number(kv.first);
1404+
1405+
g_redisClient->hdel(key, strLane);
1406+
1407+
removed++;
1408+
}
1409+
}
1410+
1411+
SWSS_LOG_NOTICE("removed %u lanes from redis lane map for port RID %s",
1412+
removed,
1413+
sai_serialize_object_id(port_rid).c_str());
1414+
1415+
if (removed == 0)
1416+
{
1417+
SWSS_LOG_THROW("NO LANES found in redis lane map for given port RID %s",
1418+
sai_serialize_object_id(port_rid).c_str());
1419+
}
1420+
}
1421+
1422+
bool SaiSwitch::isWarmBoot() const
1423+
{
1424+
SWSS_LOG_ENTER();
1425+
1426+
return m_warmBoot;
1427+
}

0 commit comments

Comments
 (0)