Skip to content

Commit 34b582c

Browse files
tonytituslguohan
authored andcommitted
[orchagent] Add support for Innovium platform (sonic-net#1005)
Signed-off-by: Tony Titus [email protected]
1 parent 8fcf43d commit 34b582c

File tree

4 files changed

+118
-3
lines changed

4 files changed

+118
-3
lines changed

orchagent/Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CFLAGS_SAI = -I /usr/include/sai
55
swssdir = $(datadir)/swss
66

77
dist_swss_DATA = \
8+
pfc_detect_innovium.lua \
89
pfc_detect_mellanox.lua \
910
pfc_detect_broadcom.lua \
1011
pfc_detect_barefoot.lua \

orchagent/orch.h

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const char range_specifier = '-';
3232
const char config_db_key_delimiter = '|';
3333
const char state_db_key_delimiter = '|';
3434

35+
#define INVM_PLATFORM_SUBSTRING "innovium"
3536
#define MLNX_PLATFORM_SUBSTRING "mellanox"
3637
#define BRCM_PLATFORM_SUBSTRING "broadcom"
3738
#define BFN_PLATFORM_SUBSTRING "barefoot"

orchagent/orchdaemon.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@ OrchDaemon::OrchDaemon(DBConnector *applDb, DBConnector *configDb, DBConnector *
4545
OrchDaemon::~OrchDaemon()
4646
{
4747
SWSS_LOG_ENTER();
48-
48+
4949
/*
5050
* Some orchagents call other agents in their destructor.
5151
* To avoid accessing deleted agent, do deletion in reverse order.
5252
* NOTE: This is stil not a robust solution, as order in this list
53-
* does not strictly match the order of construction of agents.
54-
* For a robust solution, first some cleaning/house-keeping in
53+
* does not strictly match the order of construction of agents.
54+
* For a robust solution, first some cleaning/house-keeping in
5555
* orchagents management is in order.
5656
* For now it fixes, possible crash during process exit.
5757
*/
@@ -245,6 +245,7 @@ bool OrchDaemon::init()
245245
};
246246

247247
if ((platform == MLNX_PLATFORM_SUBSTRING)
248+
|| (platform == INVM_PLATFORM_SUBSTRING)
248249
|| (platform == BFN_PLATFORM_SUBSTRING)
249250
|| (platform == NPS_PLATFORM_SUBSTRING))
250251
{
@@ -278,6 +279,7 @@ bool OrchDaemon::init()
278279
static const vector<sai_queue_attr_t> queueAttrIds;
279280

280281
if ((platform == MLNX_PLATFORM_SUBSTRING)
282+
|| (platform == INVM_PLATFORM_SUBSTRING)
281283
|| (platform == NPS_PLATFORM_SUBSTRING))
282284
{
283285
m_orchList.push_back(new PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>(

orchagent/pfc_detect_innovium.lua

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
-- KEYS - queue IDs
2+
-- ARGV[1] - counters db index
3+
-- ARGV[2] - counters table name
4+
-- ARGV[3] - poll time interval
5+
-- return queue Ids that satisfy criteria
6+
7+
local counters_db = ARGV[1]
8+
local counters_table_name = ARGV[2]
9+
local poll_time = tonumber(ARGV[3])
10+
11+
local rets = {}
12+
13+
redis.call('SELECT', counters_db)
14+
15+
-- Iterate through each queue
16+
local n = table.getn(KEYS)
17+
for i = n, 1, -1 do
18+
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
19+
local counter_num = 0
20+
local old_counter_num = 0
21+
local is_deadlock = false
22+
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
23+
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
24+
25+
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
26+
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
27+
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
28+
if detection_time then
29+
detection_time = tonumber(detection_time)
30+
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
31+
if not time_left then
32+
time_left = detection_time
33+
else
34+
time_left = tonumber(time_left)
35+
end
36+
37+
local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
38+
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
39+
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
40+
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION'
41+
42+
-- Get all counters
43+
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
44+
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
45+
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
46+
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)
47+
48+
if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
49+
occupancy_bytes = tonumber(occupancy_bytes)
50+
packets = tonumber(packets)
51+
pfc_rx_packets = tonumber(pfc_rx_packets)
52+
pfc_duration = tonumber(pfc_duration)
53+
54+
local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
55+
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
56+
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
57+
-- DEBUG CODE START. Uncomment to enable
58+
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
59+
-- DEBUG CODE END.
60+
61+
-- If this is not a first run, then we have last values available
62+
if packets_last and pfc_rx_packets_last and pfc_duration_last then
63+
packets_last = tonumber(packets_last)
64+
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
65+
pfc_duration_last = tonumber(pfc_duration_last)
66+
67+
-- Check actual condition of queue being in PFC storm
68+
-- if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) then
69+
-- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_1', 'YES')
70+
71+
-- if (debug_storm == "enabled") then
72+
-- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_2', 'YES')
73+
74+
-- if (occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
75+
-- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_3', 'YES')
76+
77+
78+
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
79+
-- DEBUG CODE START. Uncomment to enable
80+
(debug_storm == "enabled") or
81+
-- DEBUG CODE END.
82+
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
83+
if time_left <= poll_time then
84+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
85+
is_deadlock = true
86+
time_left = detection_time
87+
else
88+
time_left = time_left - poll_time
89+
end
90+
else
91+
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
92+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
93+
end
94+
time_left = detection_time
95+
end
96+
end
97+
98+
-- Save values for next run
99+
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
100+
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
101+
if is_deadlock == false then
102+
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
103+
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
104+
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
105+
end
106+
end
107+
end
108+
end
109+
end
110+
111+
return rets

0 commit comments

Comments
 (0)