Skip to content

Commit cde242b

Browse files
vsenchyshynwendani
authored andcommitted
[orchagent]: Added support of PFC WD for BFN platform (sonic-net#823)
* [orchagent]: Added support of PFC WD for BFN platform Signed-off-by: Vitaliy Senchyshyn <[email protected]> * Fixed review comments Signed-off-by: Vitaliy Senchyshyn <[email protected]> * Use PFC WD ACL handler for BFN platform
1 parent 9d69dd5 commit cde242b

File tree

2 files changed

+88
-62
lines changed

2 files changed

+88
-62
lines changed

orchagent/orchdaemon.cpp

+24-9
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ bool OrchDaemon::init()
223223
CFG_PFC_WD_TABLE_NAME
224224
};
225225

226-
if (platform == MLNX_PLATFORM_SUBSTRING
227-
|| platform == NPS_PLATFORM_SUBSTRING)
226+
if ((platform == MLNX_PLATFORM_SUBSTRING)
227+
|| (platform == BFN_PLATFORM_SUBSTRING)
228+
|| (platform == NPS_PLATFORM_SUBSTRING))
228229
{
229230

230231
static const vector<sai_port_stat_t> portStatIds =
@@ -255,13 +256,27 @@ bool OrchDaemon::init()
255256

256257
static const vector<sai_queue_attr_t> queueAttrIds;
257258

258-
m_orchList.push_back(new PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>(
259-
m_configDb,
260-
pfc_wd_tables,
261-
portStatIds,
262-
queueStatIds,
263-
queueAttrIds,
264-
PFC_WD_POLL_MSECS));
259+
if ((platform == MLNX_PLATFORM_SUBSTRING)
260+
|| (platform == NPS_PLATFORM_SUBSTRING))
261+
{
262+
m_orchList.push_back(new PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>(
263+
m_configDb,
264+
pfc_wd_tables,
265+
portStatIds,
266+
queueStatIds,
267+
queueAttrIds,
268+
PFC_WD_POLL_MSECS));
269+
}
270+
else if (platform == BFN_PLATFORM_SUBSTRING)
271+
{
272+
m_orchList.push_back(new PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>(
273+
m_configDb,
274+
pfc_wd_tables,
275+
portStatIds,
276+
queueStatIds,
277+
queueAttrIds,
278+
PFC_WD_POLL_MSECS));
279+
}
265280
}
266281
else if (platform == BRCM_PLATFORM_SUBSTRING)
267282
{

orchagent/pfc_detect_barefoot.lua

+64-53
Original file line numberDiff line numberDiff line change
@@ -21,70 +21,81 @@ for i = n, 1, -1 do
2121
local is_deadlock = false
2222
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
2323
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
24-
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
25-
local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME'))
26-
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
27-
if not time_left then
28-
time_left = detection_time
29-
else
30-
time_left = tonumber(time_left)
31-
end
3224

33-
local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
34-
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
35-
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
36-
local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS'
37-
25+
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
26+
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
27+
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
28+
if detection_time then
29+
detection_time = tonumber(detection_time)
30+
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
31+
if not time_left then
32+
time_left = detection_time
33+
else
34+
time_left = tonumber(time_left)
35+
end
3836

39-
-- Get all counters
40-
local occupancy_bytes = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES'))
41-
local packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS'))
42-
local pfc_rx_packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key))
43-
local pfc_on2off = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key))
44-
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')
45-
46-
local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
47-
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
48-
local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last')
49-
local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last')
37+
local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
38+
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
39+
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
40+
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION'
5041

51-
-- DEBUG CODE START. Uncomment to enable
52-
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
53-
-- DEBUG CODE END.
42+
-- Get all counters
43+
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
44+
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
45+
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
46+
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)
5447

55-
-- If this is not a first run, then we have last values available
56-
if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then
57-
packets_last = tonumber(packets_last)
58-
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
59-
pfc_on2off_last = tonumber(pfc_on2off_last)
48+
if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
49+
occupancy_bytes = tonumber(occupancy_bytes)
50+
packets = tonumber(packets)
51+
pfc_rx_packets = tonumber(pfc_rx_packets)
52+
pfc_duration = tonumber(pfc_duration)
6053

61-
-- Check actual condition of queue being in PFC storm
62-
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
54+
local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
55+
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
56+
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
6357
-- DEBUG CODE START. Uncomment to enable
64-
(debug_storm == "enabled") or
58+
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
6559
-- DEBUG CODE END.
66-
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then
67-
if time_left <= poll_time then
68-
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
69-
is_deadlock = true
70-
time_left = detection_time
71-
else
72-
time_left = time_left - poll_time
60+
61+
-- If this is not a first run, then we have last values available
62+
if packets_last and pfc_rx_packets_last and pfc_duration_last then
63+
packets_last = tonumber(packets_last)
64+
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
65+
pfc_duration_last = tonumber(pfc_duration_last)
66+
67+
-- Check actual condition of queue being in PFC storm
68+
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
69+
-- DEBUG CODE START. Uncomment to enable
70+
(debug_storm == "enabled") or
71+
-- DEBUG CODE END.
72+
(occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
73+
if time_left <= poll_time then
74+
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
75+
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
76+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
77+
is_deadlock = true
78+
time_left = detection_time
79+
else
80+
time_left = time_left - poll_time
81+
end
82+
else
83+
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
84+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
85+
end
86+
time_left = detection_time
87+
end
7388
end
74-
else
75-
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
76-
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
89+
90+
-- Save values for next run
91+
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
92+
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
93+
if is_deadlock == false then
94+
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
95+
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
7796
end
78-
time_left = detection_time
7997
end
8098
end
81-
82-
-- Save values for next run
83-
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
84-
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
85-
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
86-
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
87-
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off)
8899
end
89100
end
90101

0 commit comments

Comments
 (0)