Skip to content

Commit fb06c32

Browse files
authored
[fabricportsorch] Add fabric support (sonic-net#1459)
This code is to add support for fabric asics and NPU with fabric ports enabled. What I did Create FabricOrchDaemon for fabric asics, instead of using OrchDaemon which is used for NPU. Create FabricPortsOrch to manage fabric ports. It collects information about port state, peer switch id and peer lane (stored in STATE_DB), sets up port stats and queue states. In future, it will also be used to enable/disable erroneous fabric ports. Fabric port and queue stats are setup to be collected via FlexCounters.
1 parent 73ffd5f commit fb06c32

10 files changed

+527
-68
lines changed

orchagent/Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ orchagent_SOURCES = \
3636
neighorch.cpp \
3737
intfsorch.cpp \
3838
portsorch.cpp \
39+
fabricportsorch.cpp \
3940
fgnhgorch.cpp \
4041
copporch.cpp \
4142
tunneldecaporch.cpp \

orchagent/fabricportsorch.cpp

+268
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#include "fabricportsorch.h"
2+
3+
#include <inttypes.h>
4+
#include <fstream>
5+
#include <sstream>
6+
#include <tuple>
7+
8+
#include "logger.h"
9+
#include "schema.h"
10+
#include "sai_serialize.h"
11+
#include "timer.h"
12+
13+
#define FABRIC_POLLING_INTERVAL_DEFAULT (30)
14+
#define FABRIC_PORT_ERROR 0
15+
#define FABRIC_PORT_SUCCESS 1
16+
#define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER"
17+
#define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000
18+
#define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER"
19+
#define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000
20+
#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE"
21+
22+
extern sai_object_id_t gSwitchId;
23+
extern sai_switch_api_t *sai_switch_api;
24+
extern sai_port_api_t *sai_port_api;
25+
26+
const vector<sai_port_stat_t> port_stat_ids =
27+
{
28+
SAI_PORT_STAT_IF_IN_OCTETS,
29+
SAI_PORT_STAT_IF_IN_ERRORS,
30+
SAI_PORT_STAT_IF_IN_FABRIC_DATA_UNITS,
31+
SAI_PORT_STAT_IF_IN_FEC_CORRECTABLE_FRAMES,
32+
SAI_PORT_STAT_IF_IN_FEC_NOT_CORRECTABLE_FRAMES,
33+
SAI_PORT_STAT_IF_IN_FEC_SYMBOL_ERRORS,
34+
SAI_PORT_STAT_IF_OUT_OCTETS,
35+
SAI_PORT_STAT_IF_OUT_FABRIC_DATA_UNITS,
36+
};
37+
38+
static const vector<sai_queue_stat_t> queue_stat_ids =
39+
{
40+
SAI_QUEUE_STAT_WATERMARK_LEVEL,
41+
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES,
42+
SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL,
43+
};
44+
45+
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames) :
46+
Orch(appl_db, tableNames),
47+
port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
48+
FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
49+
queue_stat_manager(FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
50+
FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
51+
m_timer(new SelectableTimer(timespec { .tv_sec = FABRIC_POLLING_INTERVAL_DEFAULT, .tv_nsec = 0 }))
52+
{
53+
SWSS_LOG_ENTER();
54+
55+
SWSS_LOG_NOTICE( "FabricPortsOrch constructor" );
56+
57+
m_state_db = shared_ptr<DBConnector>(new DBConnector("STATE_DB", 0));
58+
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), FABRIC_PORT_TABLE));
59+
60+
m_counter_db = shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0));
61+
m_laneQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP));
62+
m_lanePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP));
63+
64+
m_flex_db = shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0));
65+
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE));
66+
67+
getFabricPortList();
68+
69+
auto executor = new ExecutableTimer(m_timer, this, "FABRIC_POLL");
70+
Orch::addExecutor(executor);
71+
m_timer->start();
72+
}
73+
74+
int FabricPortsOrch::getFabricPortList()
75+
{
76+
SWSS_LOG_ENTER();
77+
78+
if (m_getFabricPortListDone) {
79+
return FABRIC_PORT_SUCCESS;
80+
}
81+
82+
uint32_t i;
83+
sai_status_t status;
84+
sai_attribute_t attr;
85+
86+
attr.id = SAI_SWITCH_ATTR_NUMBER_OF_FABRIC_PORTS;
87+
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr);
88+
if (status != SAI_STATUS_SUCCESS)
89+
{
90+
SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status);
91+
return FABRIC_PORT_ERROR;
92+
}
93+
m_fabricPortCount = attr.value.u32;
94+
SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount);
95+
96+
vector<sai_object_id_t> fabric_port_list;
97+
fabric_port_list.resize(m_fabricPortCount);
98+
attr.id = SAI_SWITCH_ATTR_FABRIC_PORT_LIST;
99+
attr.value.objlist.count = (uint32_t)fabric_port_list.size();
100+
attr.value.objlist.list = fabric_port_list.data();
101+
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr);
102+
if (status != SAI_STATUS_SUCCESS)
103+
{
104+
throw runtime_error("FabricPortsOrch get port list failure");
105+
}
106+
107+
for (i = 0; i < m_fabricPortCount; i++)
108+
{
109+
sai_uint32_t lanes[1] = { 0 };
110+
attr.id = SAI_PORT_ATTR_HW_LANE_LIST;
111+
attr.value.u32list.count = 1;
112+
attr.value.u32list.list = lanes;
113+
status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr);
114+
if (status != SAI_STATUS_SUCCESS)
115+
{
116+
throw runtime_error("FabricPortsOrch get port lane failure");
117+
}
118+
int lane = attr.value.u32list.list[0];
119+
m_fabricLanePortMap[lane] = fabric_port_list[i];
120+
}
121+
122+
generatePortStats();
123+
124+
m_getFabricPortListDone = true;
125+
126+
updateFabricPortState();
127+
128+
return FABRIC_PORT_SUCCESS;
129+
}
130+
131+
bool FabricPortsOrch::allPortsReady()
132+
{
133+
return m_getFabricPortListDone;
134+
}
135+
136+
void FabricPortsOrch::generatePortStats()
137+
{
138+
// FIX_ME: This function installs flex counters for port stats
139+
// on fabric ports for fabric asics and voq asics (that connect
140+
// to fabric asics via fabric ports). These counters will be
141+
// installed in FLEX_COUNTER_DB, and queried by syncd and updated
142+
// to COUNTERS_DB.
143+
// However, currently BCM SAI doesn't update its code to query
144+
// port stats (metrics in list port_stat_ids) yet.
145+
// Also, BCM sets too low value for "Max logical port count" (256),
146+
// causing syncd to crash on voq asics that now include regular front
147+
// panel ports, fabric ports, and multiple logical ports.
148+
// So, this function will just do nothing for now, and we will readd
149+
// code to install port stats counters when BCM completely supports.
150+
}
151+
152+
void FabricPortsOrch::generateQueueStats()
153+
{
154+
if (m_isQueueStatsGenerated) return;
155+
if (!m_getFabricPortListDone) return;
156+
157+
// FIX_ME: Similar to generatePortStats(), generateQueueStats() installs
158+
// flex counters for queue stats on fabric ports for fabric asics and voq asics.
159+
// However, currently BCM SAI doesn't fully support queue stats query.
160+
// Query on queue type and index is not supported for fabric asics while
161+
// voq asics are not completely supported.
162+
// So, this function will just do nothing for now, and we will readd
163+
// code to install queue stats counters when BCM completely supports.
164+
165+
m_isQueueStatsGenerated = true;
166+
}
167+
168+
void FabricPortsOrch::updateFabricPortState()
169+
{
170+
if (!m_getFabricPortListDone) return;
171+
172+
SWSS_LOG_ENTER();
173+
174+
sai_status_t status;
175+
sai_attribute_t attr;
176+
177+
time_t now;
178+
struct timespec time_now;
179+
if (clock_gettime(CLOCK_MONOTONIC, &time_now) < 0)
180+
{
181+
return;
182+
}
183+
now = time_now.tv_sec;
184+
185+
for (auto p : m_fabricLanePortMap)
186+
{
187+
int lane = p.first;
188+
sai_object_id_t port = p.second;
189+
190+
string key = "PORT" + to_string(lane);
191+
std::vector<FieldValueTuple> values;
192+
uint32_t remote_peer;
193+
uint32_t remote_port;
194+
195+
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED;
196+
status = sai_port_api->get_port_attribute(port, 1, &attr);
197+
if (status != SAI_STATUS_SUCCESS)
198+
{
199+
// Port may not be ready for query
200+
SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status);
201+
return;
202+
}
203+
204+
if (m_portStatus.find(lane) != m_portStatus.end() &&
205+
m_portStatus[lane] && !attr.value.booldata)
206+
{
207+
m_portDownCount[lane] ++;
208+
m_portDownSeenLastTime[lane] = now;
209+
}
210+
m_portStatus[lane] = attr.value.booldata;
211+
212+
if (m_portStatus[lane])
213+
{
214+
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_SWITCH_ID;
215+
status = sai_port_api->get_port_attribute(port, 1, &attr);
216+
if (status != SAI_STATUS_SUCCESS)
217+
{
218+
throw runtime_error("FabricPortsOrch get remote id failure");
219+
}
220+
remote_peer = attr.value.u32;
221+
222+
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX;
223+
status = sai_port_api->get_port_attribute(port, 1, &attr);
224+
if (status != SAI_STATUS_SUCCESS)
225+
{
226+
throw runtime_error("FabricPortsOrch get remote port index failure");
227+
}
228+
remote_port = attr.value.u32;
229+
}
230+
231+
values.emplace_back("STATUS", m_portStatus[lane] ? "up" : "down");
232+
if (m_portStatus[lane])
233+
{
234+
values.emplace_back("REMOTE_MOD", to_string(remote_peer));
235+
values.emplace_back("REMOTE_PORT", to_string(remote_port));
236+
}
237+
if (m_portDownCount[lane] > 0)
238+
{
239+
values.emplace_back("PORT_DOWN_COUNT", to_string(m_portDownCount[lane]));
240+
values.emplace_back("PORT_DOWN_SEEN_LAST_TIME",
241+
to_string(m_portDownSeenLastTime[lane]));
242+
}
243+
m_stateTable->set(key, values);
244+
}
245+
}
246+
247+
void FabricPortsOrch::doTask()
248+
{
249+
}
250+
251+
void FabricPortsOrch::doTask(Consumer &consumer)
252+
{
253+
}
254+
255+
void FabricPortsOrch::doTask(swss::SelectableTimer &timer)
256+
{
257+
SWSS_LOG_ENTER();
258+
259+
if (!m_getFabricPortListDone)
260+
{
261+
getFabricPortList();
262+
}
263+
264+
if (m_getFabricPortListDone)
265+
{
266+
updateFabricPortState();
267+
}
268+
}

orchagent/fabricportsorch.h

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#ifndef SWSS_FABRICPORTSORCH_H
2+
#define SWSS_FABRICPORTSORCH_H
3+
4+
#include <map>
5+
6+
#include "orch.h"
7+
#include "observer.h"
8+
#include "observer.h"
9+
#include "producertable.h"
10+
#include "flex_counter_manager.h"
11+
12+
class FabricPortsOrch : public Orch, public Subject
13+
{
14+
public:
15+
FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames);
16+
bool allPortsReady();
17+
void generateQueueStats();
18+
19+
private:
20+
shared_ptr<DBConnector> m_state_db;
21+
shared_ptr<DBConnector> m_counter_db;
22+
shared_ptr<DBConnector> m_flex_db;
23+
24+
unique_ptr<Table> m_stateTable;
25+
unique_ptr<Table> m_laneQueueCounterTable;
26+
unique_ptr<Table> m_lanePortCounterTable;
27+
unique_ptr<ProducerTable> m_flexCounterTable;
28+
29+
swss::SelectableTimer *m_timer = nullptr;
30+
31+
FlexCounterManager port_stat_manager;
32+
FlexCounterManager queue_stat_manager;
33+
34+
sai_uint32_t m_fabricPortCount;
35+
map<int, sai_object_id_t> m_fabricLanePortMap;
36+
unordered_map<int, bool> m_portStatus;
37+
unordered_map<int, size_t> m_portDownCount;
38+
unordered_map<int, time_t> m_portDownSeenLastTime;
39+
40+
bool m_getFabricPortListDone = false;
41+
bool m_isQueueStatsGenerated = false;
42+
int getFabricPortList();
43+
void generatePortStats();
44+
void updateFabricPortState();
45+
46+
void doTask() override;
47+
void doTask(Consumer &consumer);
48+
void doTask(swss::SelectableTimer &timer);
49+
};
50+
51+
#endif /* SWSS_FABRICPORTSORCH_H */

orchagent/flexcounterorch.cpp

+26-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <unordered_map>
22
#include "flexcounterorch.h"
33
#include "portsorch.h"
4+
#include "fabricportsorch.h"
45
#include "select.h"
56
#include "notifier.h"
67
#include "sai_serialize.h"
@@ -12,6 +13,7 @@
1213
extern sai_port_api_t *sai_port_api;
1314

1415
extern PortsOrch *gPortsOrch;
16+
extern FabricPortsOrch *gFabricPortsOrch;
1517
extern IntfsOrch *gIntfsOrch;
1618
extern BufferOrch *gBufferOrch;
1719

@@ -51,7 +53,12 @@ void FlexCounterOrch::doTask(Consumer &consumer)
5153
{
5254
SWSS_LOG_ENTER();
5355

54-
if (!gPortsOrch->allPortsReady())
56+
if (gPortsOrch && !gPortsOrch->allPortsReady())
57+
{
58+
return;
59+
}
60+
61+
if (gFabricPortsOrch && !gFabricPortsOrch->allPortsReady())
5562
{
5663
return;
5764
}
@@ -101,15 +108,28 @@ void FlexCounterOrch::doTask(Consumer &consumer)
101108
// This can be because generateQueueMap() installs a fundamental list of queue stats
102109
// that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked
103110
// into the same function as they may not be counted as fundamental
104-
gPortsOrch->generateQueueMap();
105-
gPortsOrch->generatePriorityGroupMap();
106-
gIntfsOrch->generateInterfaceMap();
111+
if(gPortsOrch)
112+
{
113+
gPortsOrch->generateQueueMap();
114+
gPortsOrch->generatePriorityGroupMap();
115+
}
116+
if(gPortsOrch)
117+
{
118+
gPortsOrch->generatePriorityGroupMap();
119+
}
120+
if(gIntfsOrch)
121+
{
122+
gIntfsOrch->generateInterfaceMap();
123+
}
107124
// Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event
108-
if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable"))
125+
if (gBufferOrch && (key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable"))
109126
{
110127
gBufferOrch->generateBufferPoolWatermarkCounterIdList();
111128
}
112-
129+
if (gFabricPortsOrch)
130+
{
131+
gFabricPortsOrch->generateQueueStats();
132+
}
113133
vector<FieldValueTuple> fieldValues;
114134
fieldValues.emplace_back(FLEX_COUNTER_STATUS_FIELD, value);
115135
m_flexCounterGroupTable->set(flexCounterGroupMap[key], fieldValues);

0 commit comments

Comments
 (0)