Skip to content

Commit 9b463ca

Browse files
authored
[chassis][voq] Add fabric capacity monitoring cmds (#3255)
What I did Added fabric capacity monitoring related commands and the tests. To config the warning threshold ~# config fabric Usage: config fabric [OPTIONS] COMMAND [ARGS]... FABRIC-related configuration tasks Options: -h, -?, --help Show this message and exit. Commands: monitor FABRIC MONITOR configuration tasks port FABRIC PORT configuration tasks ~# config fabric monitor Usage: config fabric monitor [OPTIONS] COMMAND [ARGS]... FABRIC MONITOR configuration tasks Options: -h, -?, --help Show this message and exit. Commands: capacity FABRIC MONITOR CAPACITY configuration tasks ~# config fabric monitor capacity Usage: config fabric monitor capacity [OPTIONS] COMMAND [ARGS]... FABRIC MONITOR CAPACITY configuration tasks Options: -?, -h, --help Show this message and exit. Commands: threshold FABRIC CAPACITY MONITOR THRESHOLD configuration tasks ~# config fabric monitor capacity threshold 90 ~# To show the capacity : ~# show fabric Usage: show fabric [OPTIONS] COMMAND [ARGS]... Show fabric information Options: -h, -?, --help Show this message and exit. Commands: counters Show fabric port counters isolation Show fabric isolation status monitor Show fabric monitor reachability Show fabric reachability ~# show fabric monitor Usage: show fabric monitor [OPTIONS] COMMAND [ARGS]... Show fabric monitor Options: -?, -h, --help Show this message and exit. Commands: capacity Show fabric capacity ~# show fabric monitor capacity Monitored fabric capacity threshold: 90% ASIC Operating Total # % Last Event Last Time Links of Links ------ ----------- ---------- ------- ------------ ----------- asic0 32 144 22.2222 Lower 0:19:17 ago asic1 28 144 19.4444 Lower 0:19:17 ago asic2 32 144 22.2222 Lower 0:19:17 ago asic3 28 144 19.4444 Lower 0:19:17 ago asic4 32 144 22.2222 Lower 0:19:17 ago asic5 28 144 19.4444 Lower 0:19:17 ago asic6 32 144 22.2222 Lower 0:19:19 ago asic7 28 144 19.4444 Lower 0:19:16 ago asic8 32 144 22.2222 Lower 0:19:16 ago asic9 28 144 19.4444 Lower 0:19:20 ago asic10 31 144 21.5278 Lower 0:19:16 ago asic11 28 144 19.4444 Lower 0:19:17 ago ~# ~# config fabric monitor capacity threshold 10 ~# show fabric monitor capacity Monitored fabric capacity threshold: 10% ASIC Operating Total # % Last Event Last Time Links of Links ------ ----------- ---------- ------- ------------ ----------- asic0 32 144 22.2222 Higher 0:00:03 ago asic1 28 144 19.4444 Higher 0:00:03 ago asic2 32 144 22.2222 Higher 0:00:03 ago asic3 28 144 19.4444 Higher 0:00:03 ago asic4 32 144 22.2222 Higher 0:00:03 ago asic5 28 144 19.4444 Higher 0:00:03 ago asic6 32 144 22.2222 Higher 0:00:05 ago asic7 28 144 19.4444 Higher 0:00:02 ago asic8 32 144 22.2222 Higher 0:00:02 ago asic9 28 144 19.4444 Higher 0:00:06 ago asic10 31 144 21.5278 Higher 0:00:02 ago asic11
1 parent df94636 commit 9b463ca

File tree

9 files changed

+244
-14
lines changed

9 files changed

+244
-14
lines changed

config/fabric.py

+42
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,45 @@ def recovery(pollcount, namespace):
315315
{"monPollThreshRecovery": pollcount})
316316

317317

318+
#
319+
# 'config fabric monitor ...'
320+
#
321+
@fabric.group(cls=clicommon.AbbreviationGroup, name='monitor')
322+
def capacity_monitor():
323+
"""FABRIC MONITOR configuration tasks"""
324+
pass
325+
326+
#
327+
# 'config fabric monitor capacity...'
328+
#
329+
@capacity_monitor.group(cls=clicommon.AbbreviationGroup)
330+
def capacity():
331+
"""FABRIC MONITOR CAPACITY configuration tasks"""
332+
pass
333+
334+
#
335+
# 'config fabric monitor capacity threshold <capcityThresh>'
336+
#
337+
@capacity.command()
338+
@click.argument('capacitythreshold', metavar='<capacityThreshold>', required=True, type=int)
339+
def threshold(capacitythreshold):
340+
"""FABRIC CAPACITY MONITOR THRESHOLD configuration tasks"""
341+
ctx = click.get_current_context()
342+
343+
if capacitythreshold < 5 or capacitythreshold > 250:
344+
ctx.fail("threshold must be in range 5...250")
345+
346+
namespaces = multi_asic.get_namespace_list()
347+
for idx, namespace in enumerate(namespaces, start=1):
348+
# Connect to config database
349+
config_db = ConfigDBConnector(use_unix_socket_path=True, namespace=namespace)
350+
config_db.connect()
351+
352+
# Make sure configuration data exists
353+
monitorData = config_db.get_all(config_db.CONFIG_DB, "FABRIC_MONITOR|FABRIC_MONITOR_DATA")
354+
if not bool(monitorData):
355+
ctx.fail("Fabric monitor configuration data not present")
356+
357+
# Update entry
358+
config_db.mod_entry("FABRIC_MONITOR", "FABRIC_MONITOR_DATA",
359+
{"monCapacityThreshWarn": capacitythreshold})

scripts/fabricstat

+97-13
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
#!/usr/bin/env python3
22

33
import argparse
4-
from collections import OrderedDict, namedtuple
4+
import click
55
import json
66
import os
77
import sys
8+
import utilities_common.multi_asic as multi_asic_util
89

9-
from utilities_common import constants
10-
from utilities_common.cli import json_serial, UserCache
11-
from utilities_common.netstat import format_number_with_comma, table_as_json, ns_diff, format_prate
10+
from collections import OrderedDict, namedtuple
11+
from datetime import datetime, timezone, timedelta
1212
from natsort import natsorted
13-
from tabulate import tabulate
1413
from sonic_py_common import multi_asic
1514
from swsscommon.swsscommon import APP_FABRIC_PORT_TABLE_NAME, COUNTERS_TABLE, COUNTERS_FABRIC_PORT_NAME_MAP, COUNTERS_FABRIC_QUEUE_NAME_MAP
16-
import utilities_common.multi_asic as multi_asic_util
15+
from tabulate import tabulate
16+
from utilities_common import constants
17+
from utilities_common.cli import json_serial, UserCache
18+
from utilities_common.netstat import format_number_with_comma, table_as_json, ns_diff, format_prate
1719

1820
# mock the redis for unit test purposes #
1921
try:
@@ -280,6 +282,62 @@ class FabricQueueStat(FabricStat):
280282
print(tabulate(table, queuestat_header, tablefmt='simple', stralign='right'))
281283
print()
282284

285+
class FabricCapacity(FabricStat):
286+
def __init__(self, namespace, table_cnt, threshold):
287+
self.db = None
288+
self.namespace = namespace
289+
self.multi_asic = multi_asic_util.MultiAsic(constants.DISPLAY_ALL, namespace)
290+
self.table_cnt = table_cnt
291+
self.threshold = threshold
292+
293+
def capacity_print(self):
294+
# Connect to database
295+
self.db = multi_asic.connect_to_all_dbs_for_ns(self.namespace)
296+
# Get fabric capacity data from STATE_DB table FABRIC_CAPACITY_TABLE
297+
# and store them in fabric_capacity_data
298+
fabric_capacity_data = self.db.get_all(self.db.STATE_DB, "FABRIC_CAPACITY_TABLE|FABRIC_CAPACITY_DATA")
299+
operational_fap_capacity = 0
300+
operational_fabric_capacity = 0
301+
operational_fabric_links = 0;
302+
total_fabric_links = 0;
303+
ratio = 0
304+
last_event = "None"
305+
last_time = "Never"
306+
307+
# Get data from fabric_capacity_data
308+
if "fabric_capacity" in fabric_capacity_data:
309+
operational_fabric_capacity = int(fabric_capacity_data['fabric_capacity'])
310+
operational_fabric_capacity = operational_fabric_capacity/1000.0
311+
if "number_of_links" in fabric_capacity_data:
312+
total_fabric_links = int(fabric_capacity_data['number_of_links'])
313+
if "operating_links" in fabric_capacity_data:
314+
operational_fabric_links = int(fabric_capacity_data['operating_links'])
315+
if "warning_threshold" in fabric_capacity_data:
316+
th = fabric_capacity_data['warning_threshold']
317+
th = th + "%"
318+
self.threshold.append(th)
319+
if "last_event" in fabric_capacity_data:
320+
last_event = fabric_capacity_data['last_event']
321+
if "last_event_time" in fabric_capacity_data:
322+
last_time = fabric_capacity_data['last_event_time']
323+
324+
# Calculate the ratio of number of operational links and all links
325+
if total_fabric_links > 0:
326+
ratio = operational_fabric_links/total_fabric_links*100
327+
328+
if last_time != "Never":
329+
dt = datetime.fromtimestamp(int(last_time), timezone.utc)
330+
td = datetime.now(timezone.utc) - dt
331+
td_without_ms = timedelta(seconds=td.seconds)
332+
last_time = str(td_without_ms) +" ago"
333+
334+
asic_name = "asic0"
335+
if self.namespace:
336+
asic_name = self.namespace
337+
338+
# Update the table to print
339+
self.table_cnt.append((asic_name, operational_fabric_links, total_fabric_links, ratio, last_event, last_time))
340+
283341
class FabricReachability(FabricStat):
284342
def reachability_print(self):
285343
# Connect to database
@@ -355,6 +413,8 @@ Examples:
355413
fabricstat -p -n asic0 -e
356414
fabricstat -q
357415
fabricstat -q -n asic0
416+
fabricstat -c
417+
fabricstat -c -n asic0
358418
fabricstat -C
359419
fabricstat -D
360420
""")
@@ -363,13 +423,15 @@ Examples:
363423
parser.add_argument('-r','--reachability', action='store_true', help='Display reachability, otherwise port stat')
364424
parser.add_argument('-n','--namespace', default=None, help='Display fabric ports counters for specific namespace')
365425
parser.add_argument('-e', '--errors', action='store_true', help='Display errors')
426+
parser.add_argument('-c','--capacity',action='store_true', help='Display fabric capacity')
366427
parser.add_argument('-i','--isolation', action='store_true', help='Display fabric ports isolation status')
367428
parser.add_argument('-C','--clear', action='store_true', help='Copy & clear fabric counters')
368429
parser.add_argument('-D','--delete', action='store_true', help='Delete saved stats')
369430

370431
args = parser.parse_args()
371432
queue = args.queue
372433
reachability = args.reachability
434+
capacity_status = args.capacity
373435
isolation_status = args.isolation
374436
namespace = args.namespace
375437
errors_only = args.errors
@@ -410,14 +472,36 @@ Examples:
410472
else:
411473
stat.cnstat_print(cnstat_dict, errors_only)
412474

413-
if namespace is None:
414-
# All asics or all fabric asics
415-
multi_asic = multi_asic_util.MultiAsic()
416-
for ns in multi_asic.get_ns_list_based_on_options():
417-
nsStat(ns, errors_only)
475+
if capacity_status:
476+
# show fabric capacity command
477+
capacity_header = []
478+
table_cnt = []
479+
threshold = []
480+
capacity_header = ["ASIC", "Operating\nLinks", "Total #\nof Links", "%", "Last Event", "Last Time"]
481+
if namespace is None:
482+
# All asics or all fabric asics
483+
multi_asic = multi_asic_util.MultiAsic()
484+
for ns in multi_asic.get_ns_list_based_on_options():
485+
stat = FabricCapacity(ns, table_cnt, threshold)
486+
stat.capacity_print()
487+
else:
488+
# Asic with namespace
489+
stat = FabricCapacity(namespace, table_cnt, threshold)
490+
stat.capacity_print()
491+
492+
click.echo("Monitored fabric capacity threshold: {}".format(threshold[0]))
493+
click.echo()
494+
click.echo(tabulate(table_cnt, capacity_header, tablefmt='simple', stralign='right'))
418495
else:
419-
# Asic with namespace
420-
nsStat(namespace, errors_only)
496+
# other show fabric commands
497+
if namespace is None:
498+
# All asics or all fabric asics
499+
multi_asic = multi_asic_util.MultiAsic()
500+
for ns in multi_asic.get_ns_list_based_on_options():
501+
nsStat(ns, errors_only)
502+
else:
503+
# Asic with namespace
504+
nsStat(namespace, errors_only)
421505

422506
if __name__ == "__main__":
423507
main()

show/fabric.py

+17
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,23 @@ def counters():
1313
"""Show fabric port counters"""
1414
pass
1515

16+
@fabric.group(cls=clicommon.AliasedGroup)
17+
def monitor():
18+
"""Show fabric monitor"""
19+
pass
20+
21+
@monitor.group(invoke_without_command=True)
22+
@multi_asic_util.multi_asic_click_option_namespace
23+
@click.option('-e', '--errors', is_flag=True)
24+
def capacity(namespace, errors):
25+
"""Show fabric capacity"""
26+
cmd = ['fabricstat', '-c']
27+
if namespace is not None:
28+
cmd += ['-n', str(namespace)]
29+
if errors:
30+
cmd += ['-e']
31+
clicommon.run_command(cmd)
32+
1633
@fabric.group(invoke_without_command=True)
1734
@multi_asic_util.multi_asic_click_option_namespace
1835
@click.option('-e', '--errors', is_flag=True)

tests/config_fabric_test.py

+24
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,18 @@ def test_config_fabric_monitor_state(self, ctx):
118118
expect_result = 0
119119
assert operator.eq(result.exit_code, expect_result)
120120

121+
def test_config_capacity(self, ctx):
122+
# Issue command "config fabric monitor capacity threshold 90",
123+
# check if the result is expected.
124+
result = self.basic_check("monitor", ["capacity", "threshold", "90"], ctx)
125+
expect_result=0
126+
assert operator.eq(result.exit_code, expect_result)
127+
128+
# Issue command "config fabric monitor capacity threshold 3",
129+
# check if the result has the warning message.
130+
result = self.basic_check("monitor", ["capacity", "threshold", "3"], ctx)
131+
assert "threshold must be in range 5...250" in result.output
132+
121133
@classmethod
122134
def teardown_class(cls):
123135
print("TEARDOWN")
@@ -152,6 +164,18 @@ def test_multi_config_fabric_monitor_state(self, ctx):
152164
expect_result = 0
153165
assert operator.eq(result.exit_code, expect_result)
154166

167+
def test_config_capacity_multi(self, ctx):
168+
# Issue command "config fabric monitor capacity threshold 80",
169+
# check if the result is expected.
170+
result = self.basic_check("monitor", ["capacity", "threshold", "80"], ctx)
171+
expect_result=0
172+
assert operator.eq(result.exit_code, expect_result)
173+
174+
# Issue command "config fabric monitor capacity threshold 4",
175+
# check if the result has the warning message.
176+
result = self.basic_check("monitor", ["capacity", "threshold", "4"], ctx)
177+
assert "threshold must be in range 5...250" in result.output
178+
155179
@classmethod
156180
def teardown_class(cls):
157181
print("TEARDOWN_TEST")

tests/fabricstat_test.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,25 @@
151151
7 0 93 up
152152
"""
153153

154+
multi_asic_fabric_capacity = """\
155+
Monitored fabric capacity threshold: 100%
156+
157+
ASIC Operating Total # % Last Event Last Time
158+
Links of Links
159+
------ ----------- ---------- ---- ------------ -----------
160+
asic0 5 8 62.5 None Never
161+
asic1 2 8 25 None Never
162+
"""
163+
164+
multi_asic_fabric_capacity_asic0 = """\
165+
Monitored fabric capacity threshold: 100%
166+
167+
ASIC Operating Total # % Last Event Last Time
168+
Links of Links
169+
------ ----------- ---------- ---- ------------ -----------
170+
asic0 5 8 62.5 None Never
171+
"""
172+
154173
multi_asic_fabric_isolation = """\
155174
156175
asic0
@@ -181,7 +200,6 @@
181200
7 0 0 0
182201
"""
183202

184-
185203
class TestFabricStat(object):
186204
@classmethod
187205
def setup_class(cls):
@@ -302,6 +320,20 @@ def test_multi_show_fabric_reachability_asic(self):
302320
assert return_code == 0
303321
assert result == multi_asic_fabric_reachability_asic0
304322

323+
def test_mutli_show_fabric_capacity(self):
324+
return_code, result = get_result_and_return_code(['fabricstat', '-c'])
325+
print("return_code: {}".format(return_code))
326+
print("result = {}".format(result))
327+
assert return_code == 0
328+
assert result == multi_asic_fabric_capacity
329+
330+
def test_multi_show_fabric_capacity_asic(self):
331+
return_code, result = get_result_and_return_code(['fabricstat', '-c', '-n', 'asic0'])
332+
print("return_code: {}".format(return_code))
333+
print("result = {}".format(result))
334+
assert return_code == 0
335+
assert result == multi_asic_fabric_capacity_asic0
336+
305337
def test_multi_show_fabric_isolation(self):
306338
return_code, result = get_result_and_return_code(['fabricstat', '-i'])
307339
print("return_code: {}".format(return_code))

tests/mock_tables/asic0/state_db.json

+10
Original file line numberDiff line numberDiff line change
@@ -304,5 +304,15 @@
304304
},
305305
"ACL_RULE_TABLE|DATAACL_5|RULE_1" : {
306306
"status": "Active"
307+
},
308+
"FABRIC_CAPACITY_TABLE|FABRIC_CAPACITY_DATA" : {
309+
"fabric_capacity": "221580",
310+
"missing_capacity": "132948",
311+
"operating_links": "5",
312+
"number_of_links": "8",
313+
"warning_threshold": "100"
314+
},
315+
"PORT_CAPACITY_TABLE|PORT_CAPACITY_DATA" : {
316+
"capacity": "80000"
307317
}
308318
}

tests/mock_tables/asic1/state_db.json

+10
Original file line numberDiff line numberDiff line change
@@ -270,5 +270,15 @@
270270
},
271271
"FABRIC_PORT_TABLE|PORT7" : {
272272
"STATUS": "down"
273+
},
274+
"FABRIC_CAPACITY_TABLE|FABRIC_CAPACITY_DATA" : {
275+
"fabric_capacity": "88632",
276+
"missing_capacity": "265896",
277+
"operating_links": "2",
278+
"number_of_links": "8",
279+
"warning_threshold": "100"
280+
},
281+
"PORT_CAPACITY_TABLE|PORT_CAPACITY_DATA" : {
282+
"capacity": "80000"
273283
}
274284
}

tests/mock_tables/config_db.json

+1
Original file line numberDiff line numberDiff line change
@@ -2703,6 +2703,7 @@
27032703
"direction": "RX"
27042704
},
27052705
"FABRIC_MONITOR|FABRIC_MONITOR_DATA": {
2706+
"monCapacityThreshWarn": "100",
27062707
"monErrThreshCrcCells": "1",
27072708
"monErrThreshRxCells": "61035156",
27082709
"monPollThreshIsolation": "1",

tests/mock_tables/state_db.json

+10
Original file line numberDiff line numberDiff line change
@@ -1671,5 +1671,15 @@
16711671
"minimum_current": "339",
16721672
"timestamp": "20230704 17:38:04",
16731673
"warning_status": "False"
1674+
},
1675+
"FABRIC_CAPACITY_TABLE|FABRIC_CAPACITY_DATA" : {
1676+
"fabric_capacity": "88632",
1677+
"missing_capacity": "265896",
1678+
"operating_links": "2",
1679+
"number_of_links": "8",
1680+
"warning_threshold": "100"
1681+
},
1682+
"PORT_CAPACITY_TABLE|PORT_CAPACITY_DATA" : {
1683+
"capacity": "80000"
16741684
}
16751685
}

0 commit comments

Comments
 (0)