Skip to content

Commit dba37b7

Browse files
committed
PFCWD recovery changes using DLR_INIT
1 parent 2489ad5 commit dba37b7

File tree

9 files changed

+226
-10
lines changed

9 files changed

+226
-10
lines changed

orchagent/orchdaemon.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -597,13 +597,26 @@ bool OrchDaemon::init()
597597
SAI_QUEUE_ATTR_PAUSE_STATUS,
598598
};
599599

600-
m_orchList.push_back(new PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>(
601-
m_configDb,
602-
pfc_wd_tables,
603-
portStatIds,
604-
queueStatIds,
605-
queueAttrIds,
606-
PFC_WD_POLL_MSECS));
600+
if(gSwitchOrch->checkPfcDlrInitEnable())
601+
{
602+
m_orchList.push_back(new PfcWdSwOrch<PfcWdDlrHandler, PfcWdLossyHandler>(
603+
m_configDb,
604+
pfc_wd_tables,
605+
portStatIds,
606+
queueStatIds,
607+
queueAttrIds,
608+
PFC_WD_POLL_MSECS));
609+
}
610+
else
611+
{
612+
m_orchList.push_back(new PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>(
613+
m_configDb,
614+
pfc_wd_tables,
615+
portStatIds,
616+
queueStatIds,
617+
queueAttrIds,
618+
PFC_WD_POLL_MSECS));
619+
}
607620
} else if (platform == CISCO_8000_PLATFORM_SUBSTRING)
608621
{
609622
static const vector<sai_port_stat_t> portStatIds;

orchagent/pfcactionhandler.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,49 @@ PfcWdSaiDlrInitHandler::~PfcWdSaiDlrInitHandler(void)
262262
}
263263
}
264264

265+
PfcWdDlrHandler::PfcWdDlrHandler(sai_object_id_t port, sai_object_id_t queue,
266+
uint8_t queueId, shared_ptr<Table> countersTable):
267+
PfcWdLossyHandler(port, queue, queueId, countersTable)
268+
{
269+
SWSS_LOG_ENTER();
270+
271+
sai_attribute_t attr;
272+
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
273+
attr.value.booldata = true;
274+
275+
// Set DLR init to true to start PFC deadlock recovery
276+
sai_status_t status = sai_queue_api->set_queue_attribute(queue, &attr);
277+
if (status != SAI_STATUS_SUCCESS)
278+
{
279+
SWSS_LOG_ERROR("Failed to set PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
280+
" queueId %d : %d",
281+
port, queue, queueId, status);
282+
return;
283+
}
284+
}
285+
286+
PfcWdDlrHandler::~PfcWdDlrHandler(void)
287+
{
288+
SWSS_LOG_ENTER();
289+
290+
sai_object_id_t port = getPort();
291+
sai_object_id_t queue = getQueue();
292+
uint8_t queueId = getQueueId();
293+
294+
sai_attribute_t attr;
295+
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
296+
attr.value.booldata = false;
297+
298+
// Set DLR init to false to stop PFC deadlock recovery
299+
sai_status_t status = sai_queue_api->set_queue_attribute(getQueue(), &attr);
300+
if (status != SAI_STATUS_SUCCESS)
301+
{
302+
SWSS_LOG_ERROR("Failed to clear PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
303+
" queueId %d : %d", port, queue, queueId, status);
304+
return;
305+
}
306+
}
307+
265308
PfcWdAclHandler::PfcWdAclHandler(sai_object_id_t port, sai_object_id_t queue,
266309
uint8_t queueId, shared_ptr<Table> countersTable):
267310
PfcWdLossyHandler(port, queue, queueId, countersTable)

orchagent/pfcactionhandler.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,14 @@ class PfcWdAclHandler: public PfcWdLossyHandler
115115
void updatePfcAclRule(shared_ptr<AclRule> rule, uint8_t queueId, string strTable, vector<sai_object_id_t> port);
116116
};
117117

118+
class PfcWdDlrHandler: public PfcWdLossyHandler
119+
{
120+
public:
121+
PfcWdDlrHandler(sai_object_id_t port, sai_object_id_t queue,
122+
uint8_t queueId, shared_ptr<Table> countersTable);
123+
virtual ~PfcWdDlrHandler(void);
124+
};
125+
118126
// PFC queue that implements drop action by draining queue with buffer of zero size
119127
class PfcWdZeroBufferHandler: public PfcWdLossyHandler
120128
{

orchagent/pfcwdorch.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@
2727
#define PFC_WD_TC_MAX 8
2828
#define COUNTER_CHECK_POLL_TIMEOUT_SEC 1
2929

30+
extern sai_object_id_t gSwitchId;
31+
extern sai_switch_api_t* sai_switch_api;
3032
extern sai_port_api_t *sai_port_api;
3133
extern sai_queue_api_t *sai_queue_api;
3234

35+
extern SwitchOrch *gSwitchOrch;
3336
extern PortsOrch *gPortsOrch;
3437

3538
template <typename DropHandler, typename ForwardHandler>
@@ -229,6 +232,36 @@ task_process_status PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const st
229232
SWSS_LOG_ERROR("Unsupported action %s for platform %s", value.c_str(), m_platform.c_str());
230233
return task_process_status::task_invalid_entry;
231234
}
235+
if(m_platform == BRCM_PLATFORM_SUBSTRING)
236+
{
237+
if(gSwitchOrch->checkPfcDlrInitEnable())
238+
{
239+
if(getPfcDlrPacketAction() == PfcWdAction::PFC_WD_ACTION_UNKNOWN)
240+
{
241+
sai_attribute_t attr;
242+
attr.id = SAI_SWITCH_ATTR_PFC_DLR_PACKET_ACTION;
243+
attr.value.u32 = (sai_uint32_t)action;
244+
245+
sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
246+
if(status != SAI_STATUS_SUCCESS)
247+
{
248+
SWSS_LOG_ERROR("Failed to set switch level PFC DLR packet action rv : %d", status);
249+
return task_process_status::task_invalid_entry;
250+
}
251+
setPfcDlrPacketAction(action);
252+
}
253+
else
254+
{
255+
if(getPfcDlrPacketAction() != action)
256+
{
257+
string DlrPacketAction = serializeAction(getPfcDlrPacketAction());
258+
SWSS_LOG_ERROR("Invalid PFC Watchdog action %s as switch level action %s is set",
259+
value.c_str(), DlrPacketAction.c_str());
260+
return task_process_status::task_invalid_entry;
261+
}
262+
}
263+
}
264+
}
232265
}
233266
else
234267
{
@@ -1064,4 +1097,5 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::bake()
10641097
// Trick to keep member functions in a separate file
10651098
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
10661099
template class PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>;
1100+
template class PfcWdSwOrch<PfcWdDlrHandler, PfcWdLossyHandler>;
10671101
template class PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdActionHandler>;

orchagent/pfcwdorch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ class PfcWdOrch: public Orch
4949

5050
virtual task_process_status createEntry(const string& key, const vector<FieldValueTuple>& data);
5151
task_process_status deleteEntry(const string& name);
52+
PfcWdAction getPfcDlrPacketAction() { return PfcDlrPacketAction; }
53+
void setPfcDlrPacketAction(PfcWdAction action) { PfcDlrPacketAction = action; }
5254

5355
protected:
5456
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;
@@ -58,6 +60,7 @@ class PfcWdOrch: public Orch
5860

5961
shared_ptr<DBConnector> m_countersDb = nullptr;
6062
shared_ptr<Table> m_countersTable = nullptr;
63+
PfcWdAction PfcDlrPacketAction = PfcWdAction::PFC_WD_ACTION_UNKNOWN;
6164
};
6265

6366
template <typename DropHandler, typename ForwardHandler>

orchagent/qosorch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ bool QosOrch::applyDscpToTcMapToSwitch(sai_attr_id_t attr_id, sai_object_id_t ma
16701670
SWSS_LOG_ENTER();
16711671

16721672
/* Query DSCP_TO_TC QoS map at switch capability */
1673-
bool rv = gSwitchOrch->querySwitchDscpToTcCapability(SAI_OBJECT_TYPE_SWITCH, SAI_SWITCH_ATTR_QOS_DSCP_TO_TC_MAP);
1673+
bool rv = gSwitchOrch->querySwitchCapability(SAI_OBJECT_TYPE_SWITCH, SAI_SWITCH_ATTR_QOS_DSCP_TO_TC_MAP);
16741674
if (rv == false)
16751675
{
16761676
SWSS_LOG_ERROR("Switch level DSCP to TC QoS map configuration is not supported");

orchagent/switchorch.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,27 @@ const map<string, sai_packet_action_t> packet_action_map =
4848

4949
const std::set<std::string> switch_non_sai_attribute_set = {"ordered_ecmp"};
5050

51+
void SwitchOrch::set_switch_pfc_dlr_init_capability()
52+
{
53+
vector<FieldValueTuple> fvVector;
54+
55+
/* Query PFC DLR INIT capability */
56+
bool rv = querySwitchCapability(SAI_OBJECT_TYPE_QUEUE, SAI_QUEUE_ATTR_PFC_DLR_INIT);
57+
if (rv == false)
58+
{
59+
SWSS_LOG_INFO("Queue level PFC DLR INIT configuration is not supported");
60+
m_PfcDlrInitEnable = false;
61+
fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE, "false");
62+
}
63+
else
64+
{
65+
SWSS_LOG_INFO("Queue level PFC DLR INIT configuration is supported");
66+
m_PfcDlrInitEnable = true;
67+
fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE, "true");
68+
}
69+
set_switch_capability(fvVector);
70+
}
71+
5172
SwitchOrch::SwitchOrch(DBConnector *db, vector<TableConnector>& connectors, TableConnector switchTable):
5273
Orch(connectors),
5374
m_switchTable(switchTable.first, switchTable.second),
@@ -60,6 +81,7 @@ SwitchOrch::SwitchOrch(DBConnector *db, vector<TableConnector>& connectors, Tabl
6081
auto restartCheckNotifier = new Notifier(m_restartCheckNotificationConsumer, this, "RESTARTCHECK");
6182
Orch::addExecutor(restartCheckNotifier);
6283

84+
set_switch_pfc_dlr_init_capability();
6385
initSensorsTable();
6486
querySwitchTpidCapability();
6587
auto executorT = new ExecutableTimer(m_sensorsPollerTimer, this, "ASIC_SENSORS_POLL_TIMER");
@@ -762,7 +784,7 @@ void SwitchOrch::querySwitchTpidCapability()
762784
}
763785
}
764786

765-
bool SwitchOrch::querySwitchDscpToTcCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id)
787+
bool SwitchOrch::querySwitchCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id)
766788
{
767789
SWSS_LOG_ENTER();
768790

orchagent/switchorch.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define SWITCH_CAPABILITY_TABLE_PORT_TPID_CAPABLE "PORT_TPID_CAPABLE"
1212
#define SWITCH_CAPABILITY_TABLE_LAG_TPID_CAPABLE "LAG_TPID_CAPABLE"
1313
#define SWITCH_CAPABILITY_TABLE_ORDERED_ECMP_CAPABLE "ORDERED_ECMP_CAPABLE"
14+
#define SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE "PFC_DLR_INIT_CAPABLE"
1415

1516
struct WarmRestartCheck
1617
{
@@ -30,7 +31,9 @@ class SwitchOrch : public Orch
3031
void restartCheckReply(const std::string &op, const std::string &data, std::vector<swss::FieldValueTuple> &values);
3132
bool setAgingFDB(uint32_t sec);
3233
void set_switch_capability(const std::vector<swss::FieldValueTuple>& values);
33-
bool querySwitchDscpToTcCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id);
34+
bool querySwitchCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id);
35+
bool checkPfcDlrInitEnable() { return m_PfcDlrInitEnable; }
36+
void set_switch_pfc_dlr_init_capability();
3437

3538
// Return reference to ACL group created for each stage and the bind point is
3639
// the switch
@@ -80,6 +83,7 @@ class SwitchOrch : public Orch
8083
bool m_sensorsAvgTempSupported = true;
8184
bool m_vxlanSportUserModeEnabled = false;
8285
bool m_orderedEcmpEnable = false;
86+
bool m_PfcDlrInitEnable = false;
8387

8488
// Information contained in the request from
8589
// external program for orchagent pre-shutdown state check

tests/mock_tests/portsorch_ut.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,39 @@ namespace portsorch_test
2020

2121
using namespace std;
2222

23+
sai_queue_api_t ut_sai_queue_api;
24+
sai_queue_api_t *pold_sai_queue_api;
25+
int _sai_set_queue_attr_count = 0;
26+
27+
sai_status_t _ut_stub_sai_set_queue_attribute(sai_object_id_t queue_id, const sai_attribute_t *attr)
28+
{
29+
if(attr->id == SAI_QUEUE_ATTR_PFC_DLR_INIT)
30+
{
31+
if(attr->value.booldata == true)
32+
{
33+
_sai_set_queue_attr_count++;
34+
}
35+
else
36+
{
37+
_sai_set_queue_attr_count--;
38+
}
39+
}
40+
return SAI_STATUS_SUCCESS;
41+
}
42+
43+
void _hook_sai_queue_api()
44+
{
45+
ut_sai_queue_api = *sai_queue_api;
46+
pold_sai_queue_api = sai_queue_api;
47+
ut_sai_queue_api.set_queue_attribute = _ut_stub_sai_set_queue_attribute;
48+
sai_queue_api = &ut_sai_queue_api;
49+
}
50+
51+
void _unhook_sai_queue_api()
52+
{
53+
sai_queue_api = pold_sai_queue_api;
54+
}
55+
2356
struct PortsOrchTest : public ::testing::Test
2457
{
2558
shared_ptr<swss::DBConnector> m_app_db;
@@ -361,6 +394,61 @@ namespace portsorch_test
361394
ASSERT_TRUE(ts.empty());
362395
}
363396

397+
TEST_F(PortsOrchTest, PfcDlrHandlerCallingDlrInitAttribute)
398+
{
399+
_hook_sai_queue_api();
400+
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
401+
Table pgTable = Table(m_app_db.get(), APP_BUFFER_PG_TABLE_NAME);
402+
Table profileTable = Table(m_app_db.get(), APP_BUFFER_PROFILE_TABLE_NAME);
403+
Table poolTable = Table(m_app_db.get(), APP_BUFFER_POOL_TABLE_NAME);
404+
Table queueTable = Table(m_app_db.get(), APP_BUFFER_QUEUE_TABLE_NAME);
405+
406+
// Get SAI default ports to populate DB
407+
auto ports = ut_helper::getInitialSaiPorts();
408+
409+
// Populate port table with SAI ports
410+
for (const auto &it : ports)
411+
{
412+
portTable.set(it.first, it.second);
413+
}
414+
415+
// Set PortConfigDone, PortInitDone
416+
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });
417+
portTable.set("PortInitDone", { { "lanes", "0" } });
418+
419+
// refill consumer
420+
gPortsOrch->addExistingData(&portTable);
421+
422+
// Apply configuration :
423+
// create ports
424+
425+
static_cast<Orch *>(gPortsOrch)->doTask();
426+
427+
// Apply configuration
428+
// ports
429+
static_cast<Orch *>(gPortsOrch)->doTask();
430+
431+
ASSERT_TRUE(gPortsOrch->allPortsReady());
432+
433+
// No more tasks
434+
vector<string> ts;
435+
gPortsOrch->dumpPendingTasks(ts);
436+
ASSERT_TRUE(ts.empty());
437+
ts.clear();
438+
439+
// Simulate storm drop handler started on Ethernet0 TC 3
440+
Port port;
441+
gPortsOrch->getPort("Ethernet0", port);
442+
auto countersTable = make_shared<Table>(m_counters_db.get(), COUNTERS_TABLE);
443+
auto dropHandler = make_unique<PfcWdDlrHandler>(port.m_port_id, port.m_queue_ids[3], 3, countersTable);
444+
ASSERT_TRUE(_sai_set_queue_attr_count == 1);
445+
446+
dropHandler.reset();
447+
ASSERT_FALSE(_sai_set_queue_attr_count == 1);
448+
449+
_unhook_sai_queue_api();
450+
}
451+
364452
TEST_F(PortsOrchTest, PfcZeroBufferHandler)
365453
{
366454
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
@@ -799,4 +887,5 @@ namespace portsorch_test
799887

800888
ASSERT_FALSE(bridgePortCalledBeforeLagMember); // bridge port created on lag before lag member was created
801889
}
890+
802891
}

0 commit comments

Comments
 (0)