Skip to content

Commit 9b9ac4f

Browse files
stephenxsStormLiangMS
authored andcommitted
Add more debug information when PFC WD is triggered (#2858)
Add more debug information when PFC WD is triggered
1 parent 5d80f57 commit 9b9ac4f

File tree

3 files changed

+52
-12
lines changed

3 files changed

+52
-12
lines changed

orchagent/pfc_detect_mellanox.lua

+17-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@ local rets = {}
1212

1313
redis.call('SELECT', counters_db)
1414

15+
-- Record the polling time
16+
local timestamp_last = redis.call('HGET', 'TIMESTAMP', 'pfcwd_poll_timestamp_last')
17+
local timestamp_struct = redis.call('TIME')
18+
local timestamp_current = timestamp_struct[1] + timestamp_struct[2] / 1000000
19+
local timestamp_string = tostring(timestamp_current)
20+
redis.call('HSET', 'TIMESTAMP', 'pfcwd_poll_timestamp_last', timestamp_string)
21+
local real_poll_time = poll_time
22+
if timestamp_last ~= false then
23+
real_poll_time = (timestamp_current - tonumber(timestamp_last)) * 1000000
24+
end
25+
1526
-- Iterate through each queue
1627
local n = table.getn(KEYS)
1728
for i = n, 1, -1 do
@@ -78,7 +89,12 @@ for i = n, 1, -1 do
7889
if time_left <= poll_time then
7990
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
8091
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
81-
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
92+
local occupancy_string = '"occupancy","' .. tostring(occupancy_bytes) .. '",'
93+
local packets_string = '"packets","' .. tostring(packets) .. '","packets_last","' .. tostring(packets_last) .. '",'
94+
local pfc_rx_packets_string = '"pfc_rx_packets","' .. tostring(pfc_rx_packets) .. '","pfc_rx_packets_last","' .. tostring(pfc_rx_packets_last) .. '",'
95+
local storm_condition_string = '"pfc_duration","' .. tostring(pfc_duration) .. '","pfc_duration_last","' .. tostring(pfc_duration_last) .. '",'
96+
local timestamps = '"timestamp","' .. timestamp_string .. '","timestamp_last","' .. timestamp_last .. '","real_poll_time","' .. real_poll_time .. '"'
97+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm",' .. occupancy_string .. packets_string .. pfc_rx_packets_string .. storm_condition_string .. timestamps .. ']')
8298
is_deadlock = true
8399
time_left = detection_time
84100
else

orchagent/pfcwdorch.cpp

+32-8
Original file line numberDiff line numberDiff line change
@@ -913,10 +913,20 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
913913

914914
wdNotification.pop(queueIdStr, event, values);
915915

916+
string info;
917+
for (auto &fv : values)
918+
{
919+
info += fvField(fv) + ":" + fvValue(fv) + "|";
920+
}
921+
if (!info.empty())
922+
{
923+
info.pop_back();
924+
}
925+
916926
sai_object_id_t queueId = SAI_NULL_OBJECT_ID;
917927
sai_deserialize_object_id(queueIdStr, queueId);
918928

919-
if (!startWdActionOnQueue(event, queueId))
929+
if (!startWdActionOnQueue(event, queueId, info))
920930
{
921931
SWSS_LOG_ERROR("Failed to start PFC watchdog %s event action on queue %s", event.c_str(), queueIdStr.c_str());
922932
}
@@ -939,26 +949,40 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(SelectableTimer &timer)
939949

940950
template <typename DropHandler, typename ForwardHandler>
941951
void PfcWdSwOrch<DropHandler, ForwardHandler>::report_pfc_storm(
942-
sai_object_id_t id, const PfcWdQueueEntry *entry)
952+
sai_object_id_t id, const PfcWdQueueEntry *entry, const string &info)
943953
{
944954
event_params_t params = {
945955
{ "ifname", entry->portAlias },
946956
{ "queue_index", to_string(entry->index) },
947957
{ "queue_id", to_string(id) },
948958
{ "port_id", to_string(entry->portId) }};
949959

950-
SWSS_LOG_NOTICE(
951-
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64 ".",
960+
if (info.empty())
961+
{
962+
SWSS_LOG_NOTICE(
963+
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64,
952964
entry->portAlias.c_str(),
953965
entry->index,
954966
id,
955967
entry->portId);
968+
}
969+
else
970+
{
971+
SWSS_LOG_NOTICE(
972+
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64 ", additional info: %s.",
973+
entry->portAlias.c_str(),
974+
entry->index,
975+
id,
976+
entry->portId,
977+
info.c_str());
978+
params["additional_info"] = info;
979+
}
956980

957981
event_publish(g_events_handle, "pfc-storm", &params);
958982
}
959983

960984
template <typename DropHandler, typename ForwardHandler>
961-
bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string &event, sai_object_id_t queueId)
985+
bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info)
962986
{
963987
auto entry = m_entryMap.find(queueId);
964988
if (entry == m_entryMap.end())
@@ -979,7 +1003,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
9791003
{
9801004
if (entry->second.handler == nullptr)
9811005
{
982-
report_pfc_storm(entry->first, &entry->second);
1006+
report_pfc_storm(entry->first, &entry->second, info);
9831007

9841008
entry->second.handler = make_shared<PfcWdActionHandler>(
9851009
entry->second.portId,
@@ -996,7 +1020,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
9961020
{
9971021
if (entry->second.handler == nullptr)
9981022
{
999-
report_pfc_storm(entry->first, &entry->second);
1023+
report_pfc_storm(entry->first, &entry->second, info);
10001024

10011025
entry->second.handler = make_shared<DropHandler>(
10021026
entry->second.portId,
@@ -1013,7 +1037,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
10131037
{
10141038
if (entry->second.handler == nullptr)
10151039
{
1016-
report_pfc_storm(entry->first, &entry->second);
1040+
report_pfc_storm(entry->first, &entry->second, info);
10171041

10181042
entry->second.handler = make_shared<ForwardHandler>(
10191043
entry->second.portId,

orchagent/pfcwdorch.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class PfcWdOrch: public Orch
6060
void setPfcDlrPacketAction(PfcWdAction action) { PfcDlrPacketAction = action; }
6161

6262
protected:
63-
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;
63+
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info="") = 0;
6464
string m_platform = "";
6565
private:
6666

@@ -96,7 +96,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
9696
void doTask() override;
9797

9898
protected:
99-
bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) override;
99+
bool startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info="") override;
100100

101101
private:
102102
struct PfcWdQueueEntry
@@ -128,7 +128,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
128128
void enableBigRedSwitchMode();
129129
void setBigRedSwitchMode(string value);
130130

131-
void report_pfc_storm(sai_object_id_t id, const PfcWdQueueEntry *);
131+
void report_pfc_storm(sai_object_id_t id, const PfcWdQueueEntry *, const string&);
132132

133133
map<sai_object_id_t, PfcWdQueueEntry> m_entryMap;
134134
map<sai_object_id_t, PfcWdQueueEntry> m_brsEntryMap;

0 commit comments

Comments
 (0)