|
| 1 | +-- KEYS - queue IDs |
| 2 | +-- ARGV[1] - counters db index |
| 3 | +-- ARGV[2] - counters table name |
| 4 | +-- ARGV[3] - poll time interval |
| 5 | +-- return queue Ids that satisfy criteria |
| 6 | + |
| 7 | +local counters_db = ARGV[1] |
| 8 | +local counters_table_name = ARGV[2] |
| 9 | +local poll_time = tonumber(ARGV[3]) |
| 10 | + |
| 11 | +local rets = {} |
| 12 | + |
| 13 | +redis.call('SELECT', counters_db) |
| 14 | + |
| 15 | +-- Iterate through each queue |
| 16 | +local n = table.getn(KEYS) |
| 17 | +for i = n, 1, -1 do |
| 18 | + local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i]) |
| 19 | + local counter_num = 0 |
| 20 | + local old_counter_num = 0 |
| 21 | + local is_deadlock = false |
| 22 | + local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS') |
| 23 | + local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION') |
| 24 | + |
| 25 | + local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE') |
| 26 | + if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then |
| 27 | + local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME') |
| 28 | + if detection_time then |
| 29 | + detection_time = tonumber(detection_time) |
| 30 | + local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT') |
| 31 | + if not time_left then |
| 32 | + time_left = detection_time |
| 33 | + else |
| 34 | + time_left = tonumber(time_left) |
| 35 | + end |
| 36 | + |
| 37 | + local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i]) |
| 38 | + local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i]) |
| 39 | + local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS' |
| 40 | + local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION' |
| 41 | + |
| 42 | + -- Get all counters |
| 43 | + local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES') |
| 44 | + local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS') |
| 45 | + local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key) |
| 46 | + local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key) |
| 47 | + |
| 48 | + if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then |
| 49 | + occupancy_bytes = tonumber(occupancy_bytes) |
| 50 | + packets = tonumber(packets) |
| 51 | + pfc_rx_packets = tonumber(pfc_rx_packets) |
| 52 | + pfc_duration = tonumber(pfc_duration) |
| 53 | + |
| 54 | + local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last') |
| 55 | + local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last') |
| 56 | + local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last') |
| 57 | + -- DEBUG CODE START. Uncomment to enable |
| 58 | + local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM') |
| 59 | + -- DEBUG CODE END. |
| 60 | + |
| 61 | + -- If this is not a first run, then we have last values available |
| 62 | + if packets_last and pfc_rx_packets_last and pfc_duration_last then |
| 63 | + packets_last = tonumber(packets_last) |
| 64 | + pfc_rx_packets_last = tonumber(pfc_rx_packets_last) |
| 65 | + pfc_duration_last = tonumber(pfc_duration_last) |
| 66 | + |
| 67 | + -- Check actual condition of queue being in PFC storm |
| 68 | + -- if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) then |
| 69 | + -- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_1', 'YES') |
| 70 | + |
| 71 | + -- if (debug_storm == "enabled") then |
| 72 | + -- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_2', 'YES') |
| 73 | + |
| 74 | + -- if (occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then |
| 75 | + -- redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'K7_debug_3', 'YES') |
| 76 | + |
| 77 | + |
| 78 | + if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or |
| 79 | + -- DEBUG CODE START. Uncomment to enable |
| 80 | + (debug_storm == "enabled") or |
| 81 | + -- DEBUG CODE END. |
| 82 | + (occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then |
| 83 | + if time_left <= poll_time then |
| 84 | + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]') |
| 85 | + is_deadlock = true |
| 86 | + time_left = detection_time |
| 87 | + else |
| 88 | + time_left = time_left - poll_time |
| 89 | + end |
| 90 | + else |
| 91 | + if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then |
| 92 | + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]') |
| 93 | + end |
| 94 | + time_left = detection_time |
| 95 | + end |
| 96 | + end |
| 97 | + |
| 98 | + -- Save values for next run |
| 99 | + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets) |
| 100 | + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left) |
| 101 | + if is_deadlock == false then |
| 102 | + redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets) |
| 103 | + redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last') |
| 104 | + redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration) |
| 105 | + end |
| 106 | + end |
| 107 | + end |
| 108 | + end |
| 109 | +end |
| 110 | + |
| 111 | +return rets |
0 commit comments