Skip to content

Commit 31c9321

Browse files
authored
[chassis][voq]Collect counters for fabric links (sonic-net#1944)
Counters are port stats and queue stats. Currently only fabric asics could be collected. J2 fabric counter collection doesn't work yet. J2 fabric port counters fail to be collected due to logical port id for fabric links is set up to 512 while SAI supports at most 256 ports. J2 fabric queue counters are not supported by SAI at this moment (BCM confirmed). Signed-off-by: Maxime Lorrillere <[email protected]> Signed-off-by: Maxime Lorrillere <[email protected]>
1 parent df92fb7 commit 31c9321

9 files changed

+249
-38
lines changed

orchagent/fabricportsorch.cpp

+95-26
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,18 @@
1111
#include "timer.h"
1212

1313
#define FABRIC_POLLING_INTERVAL_DEFAULT (30)
14+
#define FABRIC_PORT_PREFIX "PORT"
1415
#define FABRIC_PORT_ERROR 0
1516
#define FABRIC_PORT_SUCCESS 1
1617
#define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER"
1718
#define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000
1819
#define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER"
1920
#define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000
20-
#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE"
2121

2222
extern sai_object_id_t gSwitchId;
2323
extern sai_switch_api_t *sai_switch_api;
2424
extern sai_port_api_t *sai_port_api;
25+
extern sai_queue_api_t *sai_queue_api;
2526

2627
const vector<sai_port_stat_t> port_stat_ids =
2728
{
@@ -42,7 +43,8 @@ static const vector<sai_queue_stat_t> queue_stat_ids =
4243
SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL,
4344
};
4445

45-
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames) :
46+
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames,
47+
bool fabricPortStatEnabled, bool fabricQueueStatEnabled) :
4648
Orch(appl_db, tableNames),
4749
port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
4850
FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
@@ -55,14 +57,17 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pr
5557
SWSS_LOG_NOTICE( "FabricPortsOrch constructor" );
5658

5759
m_state_db = shared_ptr<DBConnector>(new DBConnector("STATE_DB", 0));
58-
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), FABRIC_PORT_TABLE));
60+
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), APP_FABRIC_PORT_TABLE_NAME));
5961

6062
m_counter_db = shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0));
61-
m_laneQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP));
62-
m_lanePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP));
63+
m_portNameQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_QUEUE_NAME_MAP));
64+
m_portNamePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_PORT_NAME_MAP));
6365

6466
m_flex_db = shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0));
65-
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE));
67+
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), APP_FABRIC_PORT_TABLE_NAME));
68+
69+
m_fabricPortStatEnabled = fabricPortStatEnabled;
70+
m_fabricQueueStatEnabled = fabricQueueStatEnabled;
6671

6772
getFabricPortList();
6873

@@ -147,32 +152,96 @@ bool FabricPortsOrch::allPortsReady()
147152

148153
void FabricPortsOrch::generatePortStats()
149154
{
150-
// FIX_ME: This function installs flex counters for port stats
151-
// on fabric ports for fabric asics and voq asics (that connect
152-
// to fabric asics via fabric ports). These counters will be
153-
// installed in FLEX_COUNTER_DB, and queried by syncd and updated
154-
// to COUNTERS_DB.
155-
// However, currently BCM SAI doesn't update its code to query
156-
// port stats (metrics in list port_stat_ids) yet.
157-
// Also, BCM sets too low value for "Max logical port count" (256),
158-
// causing syncd to crash on voq asics that now include regular front
159-
// panel ports, fabric ports, and multiple logical ports.
160-
// So, this function will just do nothing for now, and we will readd
161-
// code to install port stats counters when BCM completely supports.
155+
if (!m_fabricPortStatEnabled) return;
156+
157+
SWSS_LOG_NOTICE("Generate fabric port stats");
158+
159+
vector<FieldValueTuple> portNamePortCounterMap;
160+
for (auto p : m_fabricLanePortMap)
161+
{
162+
int lane = p.first;
163+
sai_object_id_t port = p.second;
164+
165+
std::ostringstream portName;
166+
portName << FABRIC_PORT_PREFIX << lane;
167+
portNamePortCounterMap.emplace_back(portName.str(), sai_serialize_object_id(port));
168+
169+
// Install flex counters for port stats
170+
std::unordered_set<std::string> counter_stats;
171+
for (const auto& it: port_stat_ids)
172+
{
173+
counter_stats.emplace(sai_serialize_port_stat(it));
174+
}
175+
port_stat_manager.setCounterIdList(port, CounterType::PORT, counter_stats);
176+
}
177+
m_portNamePortCounterTable->set("", portNamePortCounterMap);
162178
}
163179

164180
void FabricPortsOrch::generateQueueStats()
165181
{
182+
if (!m_fabricQueueStatEnabled) return;
166183
if (m_isQueueStatsGenerated) return;
167184
if (!m_getFabricPortListDone) return;
168185

169-
// FIX_ME: Similar to generatePortStats(), generateQueueStats() installs
170-
// flex counters for queue stats on fabric ports for fabric asics and voq asics.
171-
// However, currently BCM SAI doesn't fully support queue stats query.
172-
// Query on queue type and index is not supported for fabric asics while
173-
// voq asics are not completely supported.
174-
// So, this function will just do nothing for now, and we will readd
175-
// code to install queue stats counters when BCM completely supports.
186+
SWSS_LOG_NOTICE("Generate queue map for fabric ports");
187+
188+
sai_status_t status;
189+
sai_attribute_t attr;
190+
191+
for (auto p : m_fabricLanePortMap)
192+
{
193+
int lane = p.first;
194+
sai_object_id_t port = p.second;
195+
196+
// Each serdes has some pipes (queues) for unicast and multicast.
197+
// But normally fabric serdes uses only one pipe.
198+
attr.id = SAI_PORT_ATTR_QOS_NUMBER_OF_QUEUES;
199+
status = sai_port_api->get_port_attribute(port, 1, &attr);
200+
if (status != SAI_STATUS_SUCCESS)
201+
{
202+
throw runtime_error("FabricPortsOrch get port queue number failure");
203+
}
204+
int num_queues = attr.value.u32;
205+
206+
if (num_queues > 0)
207+
{
208+
vector<sai_object_id_t> m_queue_ids;
209+
m_queue_ids.resize(num_queues);
210+
211+
attr.id = SAI_PORT_ATTR_QOS_QUEUE_LIST;
212+
attr.value.objlist.count = (uint32_t) num_queues;
213+
attr.value.objlist.list = m_queue_ids.data();
214+
215+
status = sai_port_api->get_port_attribute(port, 1, &attr);
216+
if (status != SAI_STATUS_SUCCESS)
217+
{
218+
throw runtime_error("FabricPortsOrch get port queue list failure");
219+
}
220+
221+
// Maintain queue map and install flex counters for queue stats
222+
vector<FieldValueTuple> portNameQueueMap;
223+
224+
// Fabric serdes queue type is SAI_QUEUE_TYPE_FABRIC_TX. Since we always
225+
// maintain only one queue for fabric serdes, m_queue_ids size is 1.
226+
// And so, there is no need to query SAI_QUEUE_ATTR_TYPE and SAI_QUEUE_ATTR_INDEX
227+
// for queue. Actually, SAI does not support query these attributes on fabric serdes.
228+
int queueIndex = 0;
229+
std::ostringstream portName;
230+
portName << FABRIC_PORT_PREFIX << lane << ":" << queueIndex;
231+
const auto queue = sai_serialize_object_id(m_queue_ids[queueIndex]);
232+
portNameQueueMap.emplace_back(portName.str(), queue);
233+
234+
// We collect queue counters like occupancy level
235+
std::unordered_set<string> counter_stats;
236+
for (const auto& it: queue_stat_ids)
237+
{
238+
counter_stats.emplace(sai_serialize_queue_stat(it));
239+
}
240+
queue_stat_manager.setCounterIdList(m_queue_ids[queueIndex], CounterType::QUEUE, counter_stats);
241+
242+
m_portNameQueueCounterTable->set("", portNameQueueMap);
243+
}
244+
}
176245

177246
m_isQueueStatsGenerated = true;
178247
}
@@ -199,7 +268,7 @@ void FabricPortsOrch::updateFabricPortState()
199268
int lane = p.first;
200269
sai_object_id_t port = p.second;
201270

202-
string key = "PORT" + to_string(lane);
271+
string key = FABRIC_PORT_PREFIX + to_string(lane);
203272
std::vector<FieldValueTuple> values;
204273
uint32_t remote_peer;
205274
uint32_t remote_port;

orchagent/fabricportsorch.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,22 @@
1212
class FabricPortsOrch : public Orch, public Subject
1313
{
1414
public:
15-
FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames);
15+
FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames,
16+
bool fabricPortStatEnabled=true, bool fabricQueueStatEnabled=true);
1617
bool allPortsReady();
1718
void generateQueueStats();
1819

1920
private:
21+
bool m_fabricPortStatEnabled;
22+
bool m_fabricQueueStatEnabled;
23+
2024
shared_ptr<DBConnector> m_state_db;
2125
shared_ptr<DBConnector> m_counter_db;
2226
shared_ptr<DBConnector> m_flex_db;
2327

2428
unique_ptr<Table> m_stateTable;
25-
unique_ptr<Table> m_laneQueueCounterTable;
26-
unique_ptr<Table> m_lanePortCounterTable;
29+
unique_ptr<Table> m_portNameQueueCounterTable;
30+
unique_ptr<Table> m_portNamePortCounterTable;
2731
unique_ptr<ProducerTable> m_flexCounterTable;
2832

2933
swss::SelectableTimer *m_timer = nullptr;

orchagent/main.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,9 @@ int main(int argc, char **argv)
708708
if (gMySwitchType == "voq")
709709
{
710710
orchDaemon->setFabricEnabled(true);
711+
// SAI doesn't fully support counters for non fabric asics
712+
orchDaemon->setFabricPortStatEnabled(false);
713+
orchDaemon->setFabricQueueStatEnabled(false);
711714
}
712715
}
713716
else

orchagent/orchdaemon.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ bool OrchDaemon::init()
459459
vector<table_name_with_pri_t> fabric_port_tables = {
460460
// empty for now
461461
};
462-
gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables);
462+
gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables, m_fabricPortStatEnabled, m_fabricQueueStatEnabled);
463463
m_orchList.push_back(gFabricPortsOrch);
464464
}
465465

orchagent/orchdaemon.h

+10
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ class OrchDaemon
6969
{
7070
m_fabricEnabled = enabled;
7171
}
72+
void setFabricPortStatEnabled(bool enabled)
73+
{
74+
m_fabricPortStatEnabled = enabled;
75+
}
76+
void setFabricQueueStatEnabled(bool enabled)
77+
{
78+
m_fabricQueueStatEnabled = enabled;
79+
}
7280
void logRotate();
7381
private:
7482
DBConnector *m_applDb;
@@ -77,6 +85,8 @@ class OrchDaemon
7785
DBConnector *m_chassisAppDb;
7886

7987
bool m_fabricEnabled = false;
88+
bool m_fabricPortStatEnabled = true;
89+
bool m_fabricQueueStatEnabled = true;
8090

8191
std::vector<Orch *> m_orchList;
8292
Select *m_select;

tests/conftest.py

+32-8
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,12 @@ def random_string(size=4, chars=string.ascii_uppercase + string.digits):
100100

101101

102102
class AsicDbValidator(DVSDatabase):
103-
def __init__(self, db_id: int, connector: str):
103+
def __init__(self, db_id: int, connector: str, switch_type: str):
104104
DVSDatabase.__init__(self, db_id, connector)
105-
self._wait_for_asic_db_to_initialize()
106-
self._populate_default_asic_db_values()
107-
self._generate_oid_to_interface_mapping()
105+
if switch_type not in ['fabric']:
106+
self._wait_for_asic_db_to_initialize()
107+
self._populate_default_asic_db_values()
108+
self._generate_oid_to_interface_mapping()
108109

109110
def _wait_for_asic_db_to_initialize(self) -> None:
110111
"""Wait up to 30 seconds for the default fields to appear in ASIC DB."""
@@ -497,7 +498,9 @@ def _polling_function():
497498
wait_for_result(_polling_function, service_polling_config)
498499

499500
def init_asic_db_validator(self) -> None:
500-
self.asicdb = AsicDbValidator(self.ASIC_DB_ID, self.redis_sock)
501+
self.get_config_db()
502+
metadata = self.config_db.get_entry('DEVICE_METADATA|localhost', '')
503+
self.asicdb = AsicDbValidator(self.ASIC_DB_ID, self.redis_sock, metadata.get("switch_type"))
501504

502505
def init_appl_db_validator(self) -> None:
503506
self.appldb = ApplDbValidator(self.APPL_DB_ID, self.redis_sock)
@@ -526,11 +529,13 @@ def _polling_function():
526529
port_table_keys = app_db.get_keys("PORT_TABLE")
527530
return ("PortInitDone" in port_table_keys and "PortConfigDone" in port_table_keys, None)
528531

529-
wait_for_result(_polling_function, startup_polling_config)
532+
if metadata.get('switch_type') not in ['fabric']:
533+
wait_for_result(_polling_function, startup_polling_config)
530534

531535
# Verify that all ports have been created
532-
asic_db = self.get_asic_db()
533-
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1) # +1 CPU Port
536+
if metadata.get('switch_type') not in ['fabric']:
537+
asic_db = self.get_asic_db()
538+
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1) # +1 CPU Port
534539

535540
# Verify that fabric ports are monitored in STATE_DB
536541
if metadata.get('switch_type', 'npu') in ['voq', 'fabric']:
@@ -1802,6 +1807,25 @@ def dvs(request, manage_dvs) -> DockerVirtualSwitch:
18021807

18031808
return manage_dvs(log_path, dvs_env)
18041809

1810+
@pytest.yield_fixture(scope="module")
1811+
def vst(request):
1812+
vctns = request.config.getoption("--vctns")
1813+
topo = request.config.getoption("--topo")
1814+
forcedvs = request.config.getoption("--forcedvs")
1815+
keeptb = request.config.getoption("--keeptb")
1816+
imgname = request.config.getoption("--imgname")
1817+
max_cpu = request.config.getoption("--max_cpu")
1818+
log_path = vctns if vctns else request.module.__name__
1819+
dvs_env = getattr(request.module, "DVS_ENV", [])
1820+
if not topo:
1821+
# use ecmp topology as default
1822+
topo = "virtual_chassis/chassis_supervisor.json"
1823+
vct = DockerVirtualChassisTopology(vctns, imgname, keeptb, dvs_env, log_path, max_cpu,
1824+
forcedvs, topo)
1825+
yield vct
1826+
vct.get_logs(request.module.__name__)
1827+
vct.destroy()
1828+
18051829
@pytest.fixture(scope="module")
18061830
def vct(request):
18071831
vctns = request.config.getoption("--vctns")

0 commit comments

Comments
 (0)