Skip to content

cisco-8000 pfc-wd support #1748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ dist_swss_DATA = \
pfc_detect_broadcom.lua \
pfc_detect_barefoot.lua \
pfc_detect_nephos.lua \
pfc_detect_cisco-8000.lua \
pfc_restore.lua \
pfc_restore_cisco-8000.lua \
port_rates.lua \
watermark_queue.lua \
watermark_pg.lua \
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const char state_db_key_delimiter = '|';
#define VS_PLATFORM_SUBSTRING "vs"
#define NPS_PLATFORM_SUBSTRING "nephos"
#define MRVL_PLATFORM_SUBSTRING "marvell"
#define CISCO_8000_PLATFORM_SUBSTRING "cisco-8000"

#define CONFIGDB_KEY_SEPARATOR "|"
#define DEFAULT_KEY_SEPARATOR ":"
Expand Down
21 changes: 21 additions & 0 deletions orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,27 @@ bool OrchDaemon::init()
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
} else if (platform == CISCO_8000_PLATFORM_SUBSTRING)
{
static const vector<sai_port_stat_t> portStatIds;

static const vector<sai_queue_stat_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
};

static const vector<sai_queue_attr_t> queueAttrIds =
{
SAI_QUEUE_ATTR_PAUSE_STATUS,
};

m_orchList.push_back(new PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdSaiDlrInitHandler>(
m_configDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
}

m_orchList.push_back(&CounterCheckOrch::getInstance(m_configDb));
Expand Down
76 changes: 76 additions & 0 deletions orchagent/pfc_detect_cisco-8000.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3])

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])

-- Get PFC status
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

if packets and queue_pause_status then

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- Check actual condition of queue being in PFC storm
if (queue_pause_status == 'true')
-- DEBUG CODE START. Uncomment to enable
or (debug_storm == "enabled")
-- DEBUG CODE END.
then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
end
end
end
end

return rets
62 changes: 62 additions & 0 deletions orchagent/pfc_restore_cisco-8000.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3])

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local restoration_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
restoration_time = tonumber(restoration_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
if not time_left then
time_left = restoration_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- Check actual condition of queue being restored from PFC storm
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

if (queue_pause_status == 'false')
-- DEBUG CODE START. Uncomment to enable
and (debug_storm ~= "enabled")
-- DEBUG CODE END.
then
if time_left <= 0 then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
time_left = restoration_time
else
time_left = time_left - poll_time
end
else
time_left = restoration_time
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT', time_left)
end
end

return rets
76 changes: 76 additions & 0 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,82 @@ void PfcWdActionHandler::updateWdCounters(const string& queueIdStr, const PfcWdQ
m_countersTable->set(queueIdStr, resultFvValues);
}

PfcWdSaiDlrInitHandler::PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdActionHandler(port, queue, queueId, countersTable)
{
SWSS_LOG_ENTER();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = true;

// Set DLR init to true to start PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(queue, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d",
port, queue, queueId, status);
return;
}
}

PfcWdSaiDlrInitHandler::~PfcWdSaiDlrInitHandler(void)
{
SWSS_LOG_ENTER();

sai_object_id_t port = getPort();
sai_object_id_t queue = getQueue();
uint8_t queueId = getQueueId();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = false;

// Set DLR init to false to stop PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(getQueue(), &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to clear PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d", port, queue, queueId, status);
return;
}
}

bool PfcWdSaiDlrInitHandler::getHwCounters(PfcWdHwStats& counters)
{
SWSS_LOG_ENTER();

static const vector<sai_stat_id_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
SAI_QUEUE_STAT_DROPPED_PACKETS,
};

vector<uint64_t> queueStats;
queueStats.resize(queueStatIds.size());

sai_status_t status = sai_queue_api->get_queue_stats(
getQueue(),
static_cast<uint32_t>(queueStatIds.size()),
queueStatIds.data(),
queueStats.data());

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to fetch queue 0x%" PRIx64 " stats: %d", getQueue(), status);
return false;
}

counters.txPkt = queueStats[0];
counters.txDropPkt = queueStats[1];
counters.rxPkt = 0;
counters.rxDropPkt = 0;

return true;
}

PfcWdAclHandler::PfcWdAclHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdLossyHandler(port, queue, queueId, countersTable)
Expand Down
11 changes: 11 additions & 0 deletions orchagent/pfcactionhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,15 @@ class PfcWdZeroBufferHandler: public PfcWdLossyHandler
sai_object_id_t m_originalPgBufferProfile = SAI_NULL_OBJECT_ID;
};

// PFC queue that implements drop action by draining queue via SAI
// attribute SAI_QUEUE_ATTR_PFC_DLR_INIT.
class PfcWdSaiDlrInitHandler: public PfcWdActionHandler
{
public:
PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable);
virtual ~PfcWdSaiDlrInitHandler(void);
virtual bool getHwCounters(PfcWdHwStats& counters);
};

#endif
8 changes: 7 additions & 1 deletion orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,12 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(

string detectSha, restoreSha;
string detectPluginName = "pfc_detect_" + platform + ".lua";
string restorePluginName = "pfc_restore.lua";
string restorePluginName;
if (platform == CISCO_8000_PLATFORM_SUBSTRING) {
restorePluginName = "pfc_restore_" + platform + ".lua";
} else {
restorePluginName = "pfc_restore.lua";
}

try
{
Expand Down Expand Up @@ -1056,3 +1061,4 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::bake()
// Trick to keep member functions in a separate file
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdSaiDlrInitHandler>;