Skip to content

Commit 79a21ce

Browse files
authored
Revert frr route check (sonic-net#2761)
* Revert "[route_check] remove check-frr_patch mock (sonic-net#2732)" This reverts commit f27dea0. * Revert "[route_check] fix IPv6 address handling (sonic-net#2722)" This reverts commit ff68832. * Revert "[route_check] implement a check for FRR routes not marked offloaded (sonic-net#2531)" This reverts commit 90d7015.
1 parent 824680e commit 79a21ce

File tree

4 files changed

+16
-248
lines changed

4 files changed

+16
-248
lines changed

scripts/route_check.py

+11-111
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
How:
1212
NOTE: The flow from APPL-DB to ASIC-DB takes non zero milliseconds.
1313
1) Initiate subscribe for ASIC-DB updates.
14-
2) Read APPL-DB & ASIC-DB
14+
2) Read APPL-DB & ASIC-DB
1515
3) Get the diff.
16-
4) If any diff,
16+
4) If any diff,
1717
4.1) Collect subscribe messages for a second
18-
4.2) check diff against the subscribe messages
18+
4.2) check diff against the subscribe messages
1919
5) Rule out local interfaces & default routes
2020
6) If still outstanding diffs, report failure.
2121
@@ -29,7 +29,7 @@
2929
down to ensure failure.
3030
Analyze the reported failures to match expected.
3131
You may use the exit code to verify the result as success or not.
32-
32+
3333
3434
3535
"""
@@ -45,9 +45,7 @@
4545
import time
4646
import signal
4747
import traceback
48-
import subprocess
4948

50-
from ipaddress import ip_network
5149
from swsscommon import swsscommon
5250
from utilities_common import chassis
5351

@@ -73,9 +71,6 @@
7371

7472
PRINT_MSG_LEN_MAX = 1000
7573

76-
FRR_CHECK_RETRIES = 3
77-
FRR_WAIT_TIME = 15
78-
7974
class Level(Enum):
8075
ERR = 'ERR'
8176
INFO = 'INFO'
@@ -146,7 +141,7 @@ def add_prefix(ip):
146141
ip = ip + PREFIX_SEPARATOR + "32"
147142
else:
148143
ip = ip + PREFIX_SEPARATOR + "128"
149-
return str(ip_network(ip))
144+
return ip
150145

151146

152147
def add_prefix_ifnot(ip):
@@ -155,7 +150,7 @@ def add_prefix_ifnot(ip):
155150
:param ip: IP to add prefix as string.
156151
:return ip with prefix
157152
"""
158-
return str(ip_network(ip)) if ip.find(PREFIX_SEPARATOR) != -1 else add_prefix(ip)
153+
return ip if ip.find(PREFIX_SEPARATOR) != -1 else add_prefix(ip)
159154

160155

161156
def is_local(ip):
@@ -298,7 +293,7 @@ def get_routes():
298293

299294
def get_route_entries():
300295
"""
301-
helper to read present route entries from ASIC-DB and
296+
helper to read present route entries from ASIC-DB and
302297
as well initiate selector for ASIC-DB:ASIC-state updates.
303298
:return (selector, subscriber, <list of sorted routes>)
304299
"""
@@ -314,39 +309,14 @@ def get_route_entries():
314309
res, e = checkout_rt_entry(k)
315310
if res:
316311
rt.append(e)
317-
312+
318313
print_message(syslog.LOG_DEBUG, json.dumps({"ASIC_ROUTE_ENTRY": sorted(rt)}, indent=4))
319314

320315
selector = swsscommon.Select()
321316
selector.addSelectable(subs)
322317
return (selector, subs, sorted(rt))
323318

324319

325-
def is_suppress_fib_pending_enabled():
326-
"""
327-
Returns True if FIB suppression is enabled, False otherwise
328-
"""
329-
cfg_db = swsscommon.ConfigDBConnector()
330-
cfg_db.connect()
331-
332-
state = cfg_db.get_entry('DEVICE_METADATA', 'localhost').get('suppress-fib-pending')
333-
334-
return state == 'enabled'
335-
336-
337-
def get_frr_routes():
338-
"""
339-
Read routes from zebra through CLI command
340-
:return frr routes dictionary
341-
"""
342-
343-
output = subprocess.check_output('show ip route json', shell=True)
344-
routes = json.loads(output)
345-
output = subprocess.check_output('show ipv6 route json', shell=True)
346-
routes.update(json.loads(output))
347-
return routes
348-
349-
350320
def get_interfaces():
351321
"""
352322
helper to read interface table from APPL-DB.
@@ -384,7 +354,7 @@ def filter_out_local_interfaces(keys):
384354

385355
chassis_local_intfs = chassis.get_chassis_local_interfaces()
386356
local_if_lst.update(set(chassis_local_intfs))
387-
357+
388358
db = swsscommon.DBConnector(APPL_DB_NAME, 0)
389359
tbl = swsscommon.Table(db, 'ROUTE_TABLE')
390360

@@ -523,61 +493,6 @@ def filter_out_standalone_tunnel_routes(routes):
523493
return updated_routes
524494

525495

526-
def check_frr_pending_routes():
527-
"""
528-
Check FRR routes for offload flag presence by executing "show ip route json"
529-
Returns a list of routes that have no offload flag.
530-
"""
531-
532-
missed_rt = []
533-
534-
retries = FRR_CHECK_RETRIES
535-
for i in range(retries):
536-
missed_rt = []
537-
frr_routes = get_frr_routes()
538-
539-
for _, entries in frr_routes.items():
540-
for entry in entries:
541-
if entry['protocol'] != 'bgp':
542-
continue
543-
544-
# TODO: Also handle VRF routes. Currently this script does not check for VRF routes so it would be incorrect for us
545-
# to assume they are installed in ASIC_DB, so we don't handle them.
546-
if entry['vrfName'] != 'default':
547-
continue
548-
549-
if not entry.get('offloaded', False):
550-
missed_rt.append(entry)
551-
552-
if not missed_rt:
553-
break
554-
555-
time.sleep(FRR_WAIT_TIME)
556-
557-
return missed_rt
558-
559-
560-
def mitigate_installed_not_offloaded_frr_routes(missed_frr_rt, rt_appl):
561-
"""
562-
Mitigate installed but not offloaded FRR routes.
563-
564-
In case route exists in APPL_DB, this function will manually send a notification to fpmsyncd
565-
to trigger the flow that sends offload flag to zebra.
566-
567-
It is designed to mitigate a problem when orchagent fails to send notification about installed route to fpmsyncd
568-
or fpmsyncd not being able to read the notification or in case zebra fails to receive offload update due to variety of reasons.
569-
All of the above mentioned cases must be considered as a bug, but even in that case we will report an error in the log but
570-
given that this script ensures the route is installed in the hardware it will automitigate such a bug.
571-
"""
572-
db = swsscommon.DBConnector('APPL_STATE_DB', 0)
573-
response_producer = swsscommon.NotificationProducer(db, f'{APPL_DB_NAME}_{swsscommon.APP_ROUTE_TABLE_NAME}_RESPONSE_CHANNEL')
574-
for entry in [entry for entry in missed_frr_rt if entry['prefix'] in rt_appl]:
575-
fvs = swsscommon.FieldValuePairs([('err_str', 'SWSS_RC_SUCCESS'), ('protocol', entry['protocol'])])
576-
response_producer.send('SWSS_RC_SUCCESS', entry['prefix'], fvs)
577-
578-
print_message(syslog.LOG_ERR, f'Mitigated route {entry["prefix"]}')
579-
580-
581496
def get_soc_ips(config_db):
582497
mux_table = config_db.get_table('MUX_CABLE')
583498
soc_ips = []
@@ -621,7 +536,7 @@ def check_routes():
621536
"""
622537
The heart of this script which runs the checks.
623538
Read APPL-DB & ASIC-DB, the relevant tables for route checking.
624-
Checkout routes in ASIC-DB to match APPL-DB, discounting local &
539+
Checkout routes in ASIC-DB to match APPL-DB, discounting local &
625540
default routes. In case of missed / unexpected entries in ASIC,
626541
it might be due to update latency between APPL & ASIC DBs. So collect
627542
ASIC-DB subscribe updates for a second, and checkout if you see SET
@@ -630,16 +545,12 @@ def check_routes():
630545
If there are still some unjustifiable diffs, between APPL & ASIC DB,
631546
related to routes report failure, else all good.
632547
633-
If there are FRR routes that aren't marked offloaded but all APPL & ASIC DB
634-
routes are in sync report failure and perform a mitigation action.
635-
636548
:return (0, None) on sucess, else (-1, results) where results holds
637549
the unjustifiable entries.
638550
"""
639551
intf_appl_miss = []
640552
rt_appl_miss = []
641553
rt_asic_miss = []
642-
rt_frr_miss = []
643554

644555
results = {}
645556
adds = []
@@ -688,22 +599,11 @@ def check_routes():
688599
if rt_asic_miss:
689600
results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
690601

691-
rt_frr_miss = check_frr_pending_routes()
692-
693-
if rt_frr_miss:
694-
results["missed_FRR_routes"] = rt_frr_miss
695-
696602
if results:
697603
print_message(syslog.LOG_WARNING, "Failure results: {", json.dumps(results, indent=4), "}")
698604
print_message(syslog.LOG_WARNING, "Failed. Look at reported mismatches above")
699605
print_message(syslog.LOG_WARNING, "add: ", json.dumps(adds, indent=4))
700606
print_message(syslog.LOG_WARNING, "del: ", json.dumps(deletes, indent=4))
701-
702-
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
703-
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR but all routes in APPL_DB and ASIC_DB are in sync")
704-
if is_suppress_fib_pending_enabled():
705-
mitigate_installed_not_offloaded_frr_routes(rt_frr_miss, rt_appl)
706-
707607
return -1, results
708608
else:
709609
print_message(syslog.LOG_INFO, "All good!")
@@ -749,7 +649,7 @@ def main():
749649
return ret, res
750650
else:
751651
return ret, res
752-
652+
753653

754654

755655
if __name__ == "__main__":

tests/mock_tables/config_db.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -842,8 +842,7 @@
842842
"mac": "1d:34:db:16:a6:00",
843843
"platform": "x86_64-mlnx_msn3800-r0",
844844
"peer_switch": "sonic-switch",
845-
"type": "ToRRouter",
846-
"suppress-fib-pending": "enabled"
845+
"type": "ToRRouter"
847846
},
848847
"SNMP_COMMUNITY|msft": {
849848
"TYPE": "RO"

tests/route_check_test.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import time
88
from sonic_py_common import device_info
99
from unittest.mock import MagicMock, patch
10-
from tests.route_check_test_data import APPL_DB, ARGS, ASIC_DB, CONFIG_DB, DEFAULT_CONFIG_DB, DESCR, OP_DEL, OP_SET, PRE, RESULT, RET, TEST_DATA, UPD, FRR_ROUTES
10+
from tests.route_check_test_data import APPL_DB, ARGS, ASIC_DB, CONFIG_DB, DEFAULT_CONFIG_DB, DESCR, OP_DEL, OP_SET, PRE, RESULT, RET, TEST_DATA, UPD
1111

1212
import pytest
1313

@@ -239,7 +239,6 @@ def setup(self):
239239

240240
def init(self):
241241
route_check.UNIT_TESTING = 1
242-
route_check.FRR_WAIT_TIME = 0
243242

244243
@pytest.fixture
245244
def force_hang(self):
@@ -259,8 +258,7 @@ def mock_dbs(self):
259258
patch("route_check.swsscommon.Table") as mock_table, \
260259
patch("route_check.swsscommon.Select") as mock_sel, \
261260
patch("route_check.swsscommon.SubscriberStateTable") as mock_subs, \
262-
patch("route_check.swsscommon.ConfigDBConnector", return_value=mock_config_db), \
263-
patch("route_check.swsscommon.NotificationProducer"):
261+
patch("route_check.swsscommon.ConfigDBConnector", return_value=mock_config_db):
264262
device_info.get_platform = MagicMock(return_value='unittest')
265263
set_mock(mock_table, mock_conn, mock_sel, mock_subs, mock_config_db)
266264
yield
@@ -274,16 +272,7 @@ def test_route_check(self, mock_dbs, test_num):
274272
set_test_case_data(ct_data)
275273
logger.info("Running test case {}: {}".format(test_num, ct_data[DESCR]))
276274

277-
with patch('sys.argv', ct_data[ARGS].split()), \
278-
patch('route_check.subprocess.check_output') as mock_check_output:
279-
280-
routes = ct_data.get(FRR_ROUTES, {})
281-
282-
def side_effect(*args, **kwargs):
283-
return json.dumps(routes)
284-
285-
mock_check_output.side_effect = side_effect
286-
275+
with patch('sys.argv', ct_data[ARGS].split()):
287276
ret, res = route_check.main()
288277
expect_ret = ct_data[RET] if RET in ct_data else 0
289278
expect_res = ct_data[RESULT] if RESULT in ct_data else None

0 commit comments

Comments
 (0)