Skip to content

Commit 8f62c92

Browse files
shlomibittonraphaelt-nvidia
authored andcommitted
[flex-counters] Delay flex counters stats init for faster boot time (sonic-net#1646)
What I did Update flex counters DB with counters stats only when counters are enabled. As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch. Why I did it Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities. This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general). The flex counters are not crucial at boot time, we can delay it to the end of the boot process. How I verified it Reboot a switch and observer the flex counters DB populated after counters are enabled.
1 parent 41082b3 commit 8f62c92

9 files changed

+241
-17
lines changed

orchagent/flexcounterorch.cpp

+37-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ extern IntfsOrch *gIntfsOrch;
1616
extern BufferOrch *gBufferOrch;
1717

1818
#define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK"
19+
#define PORT_KEY "PORT"
20+
#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP"
21+
#define QUEUE_KEY "QUEUE"
22+
#define PG_WATERMARK_KEY "PG_WATERMARK"
23+
#define RIF_KEY "RIF"
1924

2025
unordered_map<string, string> flexCounterGroupMap =
2126
{
@@ -87,6 +92,16 @@ void FlexCounterOrch::doTask(Consumer &consumer)
8792
}
8893
else if(field == FLEX_COUNTER_STATUS_FIELD)
8994
{
95+
if((key == PORT_KEY) && (value == "enable"))
96+
{
97+
gPortsOrch->generatePortCounterMap();
98+
m_port_counter_enabled = true;
99+
}
100+
if((key == PORT_BUFFER_DROP_KEY) && (value == "enable"))
101+
{
102+
gPortsOrch->generatePortBufferDropCounterMap();
103+
m_port_buffer_drop_counter_enabled = true;
104+
}
90105
// Currently, the counters are disabled for polling by default
91106
// The queue maps will be generated as soon as counters are enabled for polling
92107
// Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains
@@ -101,9 +116,18 @@ void FlexCounterOrch::doTask(Consumer &consumer)
101116
// This can be because generateQueueMap() installs a fundamental list of queue stats
102117
// that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked
103118
// into the same function as they may not be counted as fundamental
104-
gPortsOrch->generateQueueMap();
105-
gPortsOrch->generatePriorityGroupMap();
106-
gIntfsOrch->generateInterfaceMap();
119+
if((key == QUEUE_KEY) && (value == "enable"))
120+
{
121+
gPortsOrch->generateQueueMap();
122+
}
123+
if((key == PG_WATERMARK_KEY) && (value == "enable"))
124+
{
125+
gPortsOrch->generatePriorityGroupMap();
126+
}
127+
if((key == RIF_KEY) && (value == "enable"))
128+
{
129+
gIntfsOrch->generateInterfaceMap();
130+
}
107131
// Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event
108132
if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable"))
109133
{
@@ -124,3 +148,13 @@ void FlexCounterOrch::doTask(Consumer &consumer)
124148
consumer.m_toSync.erase(it++);
125149
}
126150
}
151+
152+
bool FlexCounterOrch::getPortCountersState()
153+
{
154+
return m_port_counter_enabled;
155+
}
156+
157+
bool FlexCounterOrch::getPortBufferDropCountersState()
158+
{
159+
return m_port_buffer_drop_counter_enabled;
160+
}

orchagent/flexcounterorch.h

+4
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch
1515
void doTask(Consumer &consumer);
1616
FlexCounterOrch(swss::DBConnector *db, std::vector<std::string> &tableNames);
1717
virtual ~FlexCounterOrch(void);
18+
bool getPortCountersState();
19+
bool getPortBufferDropCountersState();
1820

1921
private:
2022
std::shared_ptr<swss::DBConnector> m_flexCounterDb = nullptr;
2123
std::shared_ptr<swss::ProducerTable> m_flexCounterGroupTable = nullptr;
24+
bool m_port_counter_enabled = false;
25+
bool m_port_buffer_drop_counter_enabled = false;
2226
};
2327

2428
#endif

orchagent/orchdaemon.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,11 @@ bool OrchDaemon::init()
339339
CFG_FLEX_COUNTER_TABLE_NAME
340340
};
341341

342-
m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables));
342+
auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables);
343+
m_orchList.push_back(flexCounterOrch);
344+
345+
gDirectory.set(flexCounterOrch);
346+
gDirectory.set(gPortsOrch);
343347

344348
vector<string> pfc_wd_tables = {
345349
CFG_PFC_WD_TABLE_NAME

orchagent/portsorch.cpp

+70-13
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ static char* hostif_vlan_tag[] = {
233233
*/
234234
PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames, DBConnector *chassisAppDb) :
235235
Orch(db, tableNames),
236-
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
237-
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true),
238-
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true)
236+
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false),
237+
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false),
238+
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false)
239239
{
240240
SWSS_LOG_ENTER();
241241

@@ -2131,19 +2131,21 @@ bool PortsOrch::initPort(const string &alias, const int index, const set<int> &l
21312131
vector<FieldValueTuple> fields;
21322132
fields.push_back(tuple);
21332133
m_counterTable->set("", fields);
2134+
21342135
// Install a flex counter for this port to track stats
2135-
std::unordered_set<std::string> counter_stats;
2136-
for (const auto& it: port_stat_ids)
2136+
auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
2137+
/* Delay installing the counters if they are yet enabled
2138+
If they are enabled, install the counters immediately */
2139+
if (flex_counters_orch->getPortCountersState())
21372140
{
2138-
counter_stats.emplace(sai_serialize_port_stat(it));
2141+
auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
2142+
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats);
21392143
}
2140-
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats);
2141-
std::unordered_set<std::string> port_buffer_drop_stats;
2142-
for (const auto& it: port_buffer_drop_stat_ids)
2144+
if (flex_counters_orch->getPortBufferDropCountersState())
21432145
{
2144-
port_buffer_drop_stats.emplace(sai_serialize_port_stat(it));
2146+
auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
2147+
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);
21452148
}
2146-
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);
21472149

21482150
PortUpdate update = { p, true };
21492151
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
@@ -2176,8 +2178,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id)
21762178
p.m_port_id = port_id;
21772179

21782180
/* remove port from flex_counter_table for updating counters */
2179-
port_stat_manager.clearCounterIdList(p.m_port_id);
2180-
2181+
auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
2182+
if ((flex_counters_orch->getPortCountersState()))
2183+
{
2184+
port_stat_manager.clearCounterIdList(p.m_port_id);
2185+
}
21812186
/* remove port name map from counter table */
21822187
m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias);
21832188

@@ -4699,6 +4704,38 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port)
46994704
CounterCheckOrch::getInstance().addPort(port);
47004705
}
47014706

4707+
void PortsOrch::generatePortCounterMap()
4708+
{
4709+
if (m_isPortCounterMapGenerated)
4710+
{
4711+
return;
4712+
}
4713+
4714+
auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
4715+
for (const auto& it: m_portList)
4716+
{
4717+
port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats);
4718+
}
4719+
4720+
m_isPortCounterMapGenerated = true;
4721+
}
4722+
4723+
void PortsOrch::generatePortBufferDropCounterMap()
4724+
{
4725+
if (m_isPortBufferDropCounterMapGenerated)
4726+
{
4727+
return;
4728+
}
4729+
4730+
auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
4731+
for (const auto& it: m_portList)
4732+
{
4733+
port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats);
4734+
}
4735+
4736+
m_isPortBufferDropCounterMapGenerated = true;
4737+
}
4738+
47024739
void PortsOrch::doTask(NotificationConsumer &consumer)
47034740
{
47044741
SWSS_LOG_ENTER();
@@ -5565,6 +5602,26 @@ bool PortsOrch::setVoqInbandIntf(string &alias, string &type)
55655602
return true;
55665603
}
55675604

5605+
std::unordered_set<std::string> PortsOrch::generateCounterStats(const string& type)
5606+
{
5607+
std::unordered_set<std::string> counter_stats;
5608+
if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP)
5609+
{
5610+
for (const auto& it: port_stat_ids)
5611+
{
5612+
counter_stats.emplace(sai_serialize_port_stat(it));
5613+
}
5614+
}
5615+
else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP)
5616+
{
5617+
for (const auto& it: port_buffer_drop_stat_ids)
5618+
{
5619+
counter_stats.emplace(sai_serialize_port_stat(it));
5620+
}
5621+
}
5622+
return counter_stats;
5623+
}
5624+
55685625
void PortsOrch::voqSyncAddLag (Port &lag)
55695626
{
55705627
int32_t switch_id = lag.m_system_lag_info.switch_id;

orchagent/portsorch.h

+9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "flex_counter_manager.h"
1313
#include "gearboxutils.h"
1414
#include "saihelper.h"
15+
#include "flexcounterorch.h"
1516
#include "lagid.h"
1617

1718

@@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject
125126

126127
void generateQueueMap();
127128
void generatePriorityGroupMap();
129+
void generatePortCounterMap();
130+
void generatePortBufferDropCounterMap();
128131

129132
void refreshPortStatus();
130133
bool removeAclTableGroup(const Port &p);
@@ -283,6 +286,9 @@ class PortsOrch : public Orch, public Subject
283286
bool m_isPriorityGroupMapGenerated = false;
284287
void generatePriorityGroupMapPerPort(const Port& port);
285288

289+
bool m_isPortCounterMapGenerated = false;
290+
bool m_isPortBufferDropCounterMapGenerated = false;
291+
286292
bool setPortAutoNeg(sai_object_id_t id, int an);
287293
bool setPortFecMode(sai_object_id_t id, int fec);
288294

@@ -307,6 +313,9 @@ class PortsOrch : public Orch, public Subject
307313
sai_uint32_t m_systemPortCount;
308314
bool getSystemPorts();
309315
bool addSystemPorts();
316+
317+
std::unordered_set<std::string> generateCounterStats(const string& type);
318+
310319
unique_ptr<Table> m_tableVoqSystemLagTable;
311320
unique_ptr<Table> m_tableVoqSystemLagMemberTable;
312321
void voqSyncAddLag(Port &lag);

tests/mock_tests/mock_orchagent_main.h

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "vxlanorch.h"
1616
#include "policerorch.h"
1717
#include "fgnhgorch.h"
18+
#include "flexcounterorch.h"
19+
#include "directory.h"
1820

1921
extern int gBatchSize;
2022
extern bool gSwssRecord;
@@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch;
4244
extern MirrorOrch *gMirrorOrch;
4345
extern BufferOrch *gBufferOrch;
4446
extern VRFOrch *gVrfOrch;
47+
extern Directory<Orch*> gDirectory;
4548

4649
extern sai_acl_api_t *sai_acl_api;
4750
extern sai_switch_api_t *sai_switch_api;

tests/mock_tests/portsorch_ut.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,16 @@ namespace portsorch_test
145145
};
146146

147147
ASSERT_EQ(gPortsOrch, nullptr);
148+
149+
vector<string> flex_counter_tables = {
150+
CFG_FLEX_COUNTER_TABLE_NAME
151+
};
152+
153+
auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables);
154+
gDirectory.set(flexCounterOrch);
155+
148156
gPortsOrch = new PortsOrch(m_app_db.get(), ports_tables, m_chassis_app_db.get());
157+
149158
vector<string> buffer_tables = { APP_BUFFER_POOL_TABLE_NAME,
150159
APP_BUFFER_PROFILE_TABLE_NAME,
151160
APP_BUFFER_QUEUE_TABLE_NAME,

tests/test_flex_counters.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import time
2+
import pytest
3+
4+
# Counter keys on ConfigDB
5+
PORT_KEY = "PORT"
6+
QUEUE_KEY = "QUEUE"
7+
RIF_KEY = "RIF"
8+
BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK"
9+
PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP"
10+
PG_WATERMARK_KEY = "PG_WATERMARK"
11+
12+
# Counter stats on FlexCountersDB
13+
PORT_STAT = "PORT_STAT_COUNTER"
14+
QUEUE_STAT = "QUEUE_STAT_COUNTER"
15+
RIF_STAT = "RIF_STAT_COUNTER"
16+
BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER"
17+
PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT"
18+
PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER"
19+
20+
# Counter maps on CountersDB
21+
PORT_MAP = "COUNTERS_PORT_NAME_MAP"
22+
QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP"
23+
RIF_MAP = "COUNTERS_RIF_NAME_MAP"
24+
BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP"
25+
PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP"
26+
PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP"
27+
28+
NUMBER_OF_RETRIES = 10
29+
30+
counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP],
31+
"queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP],
32+
"rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP],
33+
"buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP],
34+
"port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP],
35+
"pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]}
36+
37+
class TestFlexCounters(object):
38+
39+
def setup_dbs(self, dvs):
40+
self.config_db = dvs.get_config_db()
41+
self.flex_db = dvs.get_flex_db()
42+
self.counters_db = dvs.get_counters_db()
43+
44+
def wait_for_table(self, table):
45+
for retry in range(NUMBER_OF_RETRIES):
46+
counters_keys = self.counters_db.db_connection.hgetall(table)
47+
if len(counters_keys) > 0:
48+
return
49+
else:
50+
time.sleep(1)
51+
52+
assert False, str(table) + " not created in Counters DB"
53+
54+
def wait_for_id_list(self, stat, name, oid):
55+
for retry in range(NUMBER_OF_RETRIES):
56+
id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items()
57+
if len(id_list) > 0:
58+
return
59+
else:
60+
time.sleep(1)
61+
62+
assert False, "No ID list for counter " + str(name)
63+
64+
def verify_no_flex_counters_tables(self, counter_stat):
65+
counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat)
66+
assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group"
67+
68+
def verify_flex_counters_populated(self, map, stat):
69+
counters_keys = self.counters_db.db_connection.hgetall(map)
70+
for counter_entry in counters_keys.items():
71+
name = counter_entry[0]
72+
oid = counter_entry[1]
73+
self.wait_for_id_list(stat, name, oid)
74+
75+
def enable_flex_counter_group(self, group, map):
76+
group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"}
77+
self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry)
78+
self.wait_for_table(map)
79+
80+
@pytest.mark.parametrize("counter_type", counter_type_dict.keys())
81+
def test_flex_counters(self, dvs, counter_type):
82+
"""
83+
The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled.
84+
After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB.
85+
For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test.
86+
"""
87+
self.setup_dbs(dvs)
88+
counter_key = counter_type_dict[counter_type][0]
89+
counter_stat = counter_type_dict[counter_type][1]
90+
counter_map = counter_type_dict[counter_type][2]
91+
92+
self.verify_no_flex_counters_tables(counter_stat)
93+
94+
if counter_type == "rif_counter":
95+
self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL")
96+
self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL")
97+
98+
self.enable_flex_counter_group(counter_key, counter_map)
99+
self.verify_flex_counters_populated(counter_map, counter_stat)
100+
101+
if counter_type == "rif_counter":
102+
self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL")

0 commit comments

Comments
 (0)