Skip to content

Commit bfce363

Browse files
authored
Warm reboot for PortsOrch (sonic-net#551)
* Fix addExistingData consumer converstion * Add more addExistingData() * Warm reboot for PortsOrch * Remove calling doPortConfigDoneTask in ctor * Remove unused function signature
1 parent 674a5e6 commit bfce363

File tree

4 files changed

+145
-12
lines changed

4 files changed

+145
-12
lines changed

orchagent/orch.cpp

+70-4
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,10 @@ vector<Selectable *> Orch::getSelectables()
6666
return selectables;
6767
}
6868

69-
void Consumer::execute()
69+
void Consumer::addToSync(std::deque<KeyOpFieldsValuesTuple> &entries)
7070
{
7171
SWSS_LOG_ENTER();
7272

73-
std::deque<KeyOpFieldsValuesTuple> entries;
74-
getConsumerTable()->pops(entries);
75-
7673
/* Nothing popped */
7774
if (entries.empty())
7875
{
@@ -123,6 +120,47 @@ void Consumer::execute()
123120
m_toSync[key] = KeyOpFieldsValuesTuple(key, op, existing_values);
124121
}
125122
}
123+
}
124+
125+
// TODO: Table should be const
126+
void Consumer::refillToSync(Table* table)
127+
{
128+
std::deque<KeyOpFieldsValuesTuple> entries;
129+
vector<string> keys;
130+
table->getKeys(keys);
131+
for (const auto &key: keys)
132+
{
133+
KeyOpFieldsValuesTuple kco;
134+
135+
kfvKey(kco) = key;
136+
kfvOp(kco) = SET_COMMAND;
137+
138+
if (!table->get(key, kfvFieldsValues(kco)))
139+
{
140+
continue;
141+
}
142+
entries.push_back(kco);
143+
}
144+
145+
addToSync(entries);
146+
}
147+
148+
void Consumer::refillToSync()
149+
{
150+
auto db = getConsumerTable()->getDbConnector();
151+
string tableName = getConsumerTable()->getTableName();
152+
auto table = Table(db, tableName);
153+
refillToSync(&table);
154+
}
155+
156+
void Consumer::execute()
157+
{
158+
SWSS_LOG_ENTER();
159+
160+
std::deque<KeyOpFieldsValuesTuple> entries;
161+
getConsumerTable()->pops(entries);
162+
163+
addToSync(entries);
126164

127165
drain();
128166
}
@@ -133,6 +171,34 @@ void Consumer::drain()
133171
m_orch->doTask(*this);
134172
}
135173

174+
bool Orch::addExistingData(const string& tableName)
175+
{
176+
Consumer* consumer = dynamic_cast<Consumer *>(getExecutor(tableName));
177+
if (consumer == NULL)
178+
{
179+
SWSS_LOG_ERROR("No consumer %s in Orch", tableName.c_str());
180+
return false;
181+
}
182+
183+
consumer->refillToSync();
184+
return true;
185+
}
186+
187+
// TODO: Table should be const
188+
bool Orch::addExistingData(Table *table)
189+
{
190+
string tableName = table->getTableName();
191+
Consumer* consumer = dynamic_cast<Consumer *>(getExecutor(tableName));
192+
if (consumer == NULL)
193+
{
194+
SWSS_LOG_ERROR("No consumer %s in Orch", tableName.c_str());
195+
return false;
196+
}
197+
198+
consumer->refillToSync(table);
199+
return true;
200+
}
201+
136202
/*
137203
- Validates reference has proper format which is [table_name:object_name]
138204
- validates table_name exists

orchagent/orch.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -101,21 +101,24 @@ class Executor : public Selectable
101101

102102
class Consumer : public Executor {
103103
public:
104-
Consumer(TableConsumable *select, Orch *orch)
104+
Consumer(ConsumerTableBase *select, Orch *orch)
105105
: Executor(select, orch)
106106
{
107107
}
108108

109-
TableConsumable *getConsumerTable() const
109+
ConsumerTableBase *getConsumerTable() const
110110
{
111-
return static_cast<TableConsumable *>(getSelectable());
111+
return static_cast<ConsumerTableBase *>(getSelectable());
112112
}
113113

114114
string getTableName() const
115115
{
116116
return getConsumerTable()->getTableName();
117117
}
118118

119+
void addToSync(std::deque<KeyOpFieldsValuesTuple> &entries);
120+
void refillToSync();
121+
void refillToSync(Table* table);
119122
void execute();
120123
void drain();
121124

@@ -149,6 +152,10 @@ class Orch
149152

150153
vector<Selectable*> getSelectables();
151154

155+
// add the existing table data (left by warm reboot) to the consumer todo task list.
156+
bool addExistingData(Table *table);
157+
bool addExistingData(const string& tableName);
158+
152159
/* Iterate all consumers in m_consumerMap and run doTask(Consumer) */
153160
void doTask();
154161

orchagent/portsorch.cpp

+63-5
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
245245
m_portStatusNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS");
246246
auto portStatusNotificatier = new Notifier(m_portStatusNotificationConsumer, this);
247247
Orch::addExecutor("PORT_STATUS_NOTIFICATIONS", portStatusNotificatier);
248+
249+
// Try warm start
250+
bake();
248251
}
249252

250253
void PortsOrch::removeDefaultVlanMembers()
@@ -626,7 +629,7 @@ bool PortsOrch::bindAclTable(sai_object_id_t id, sai_object_id_t table_oid, sai_
626629
{
627630
// Bind this ACL group to LAG
628631
sai_attribute_t lag_attr;
629-
lag_attr.id = ingress ? SAI_LAG_ATTR_INGRESS_ACL : SAI_LAG_ATTR_EGRESS_ACL;
632+
lag_attr.id = ingress ? SAI_LAG_ATTR_INGRESS_ACL : SAI_LAG_ATTR_EGRESS_ACL;
630633
lag_attr.value.oid = groupOid;
631634

632635
status = sai_lag_api->set_lag_attribute(port.m_lag_id, &lag_attr);
@@ -1083,8 +1086,8 @@ bool PortsOrch::removePort(sai_object_id_t port_id)
10831086
Port p;
10841087
if (getPort(port_id, p))
10851088
{
1086-
PortUpdate update = {p, false };
1087-
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
1089+
PortUpdate update = {p, false };
1090+
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
10881091
}
10891092

10901093
sai_status_t status = sai_port_api->remove_port(port_id);
@@ -1156,8 +1159,8 @@ bool PortsOrch::initPort(const string &alias, const set<int> &lane_set)
11561159

11571160
m_flexCounterTable->set(key, fields);
11581161

1159-
PortUpdate update = {p, true };
1160-
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
1162+
PortUpdate update = {p, true };
1163+
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
11611164

11621165
SWSS_LOG_NOTICE("Initialized port %s", alias.c_str());
11631166
}
@@ -1177,6 +1180,55 @@ bool PortsOrch::initPort(const string &alias, const set<int> &lane_set)
11771180
return true;
11781181
}
11791182

1183+
bool PortsOrch::bake()
1184+
{
1185+
SWSS_LOG_ENTER();
1186+
1187+
// Check the APP_DB port table for warm reboot
1188+
vector<FieldValueTuple> tuples;
1189+
bool foundPortConfigDone = m_portTable->get("PortConfigDone", tuples);
1190+
SWSS_LOG_NOTICE("foundPortConfigDone = %d", foundPortConfigDone);
1191+
1192+
bool foundPortInitDone = m_portTable->get("PortInitDone", tuples);
1193+
SWSS_LOG_NOTICE("foundPortInitDone = %d", foundPortInitDone);
1194+
1195+
vector<string> keys;
1196+
m_portTable->getKeys(keys);
1197+
SWSS_LOG_NOTICE("m_portTable->getKeys %zd", keys.size());
1198+
1199+
if (!foundPortConfigDone || !foundPortInitDone)
1200+
{
1201+
SWSS_LOG_NOTICE("No port table, fallback to cold start");
1202+
cleanPortTable(keys);
1203+
return false;
1204+
}
1205+
1206+
if (m_portCount != keys.size() - 2)
1207+
{
1208+
// Invalid port table
1209+
SWSS_LOG_ERROR("Invalid port table: m_portCount");
1210+
cleanPortTable(keys);
1211+
return false;
1212+
}
1213+
1214+
addExistingData(m_portTable.get());
1215+
addExistingData(APP_LAG_TABLE_NAME);
1216+
addExistingData(APP_LAG_MEMBER_TABLE_NAME);
1217+
addExistingData(APP_VLAN_TABLE_NAME);
1218+
addExistingData(APP_VLAN_MEMBER_TABLE_NAME);
1219+
1220+
return true;
1221+
}
1222+
1223+
// Clean up port table
1224+
void PortsOrch::cleanPortTable(const vector<string>& keys)
1225+
{
1226+
for (auto& key : keys)
1227+
{
1228+
m_portTable->del(key);
1229+
}
1230+
}
1231+
11801232
void PortsOrch::doPortTask(Consumer &consumer)
11811233
{
11821234
SWSS_LOG_ENTER();
@@ -1279,6 +1331,12 @@ void PortsOrch::doPortTask(Consumer &consumer)
12791331
m_lanesAliasSpeedMap[lane_set] = make_tuple(alias, speed, an, fec_mode);
12801332
}
12811333

1334+
// TODO:
1335+
// Fix the issue below
1336+
// After PortConfigDone, while waiting for "PortInitDone" and the first gBufferOrch->isPortReady(alias),
1337+
// the complete m_lanesAliasSpeedMap may be populated again, so initPort() will be called more than once
1338+
// for the same port.
1339+
12821340
/* Once all ports received, go through the each port and perform appropriate actions:
12831341
* 1. Remove ports which don't exist anymore
12841342
* 2. Create new ports

orchagent/portsorch.h

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class PortsOrch : public Orch, public Subject
5656
bool isInitDone();
5757

5858
map<string, Port>& getAllPorts();
59+
bool bake();
60+
void cleanPortTable(const vector<string>& keys);
5961
bool getBridgePort(sai_object_id_t id, Port &port);
6062
bool getPort(string alias, Port &port);
6163
bool getPort(sai_object_id_t id, Port &port);

0 commit comments

Comments
 (0)