11
11
How:
12
12
NOTE: The flow from APPL-DB to ASIC-DB takes non zero milliseconds.
13
13
1) Initiate subscribe for ASIC-DB updates.
14
- 2) Read APPL-DB & ASIC-DB
14
+ 2) Read APPL-DB & ASIC-DB
15
15
3) Get the diff.
16
- 4) If any diff,
16
+ 4) If any diff,
17
17
4.1) Collect subscribe messages for a second
18
- 4.2) check diff against the subscribe messages
18
+ 4.2) check diff against the subscribe messages
19
19
5) Rule out local interfaces & default routes
20
20
6) If still outstanding diffs, report failure.
21
21
29
29
down to ensure failure.
30
30
Analyze the reported failures to match expected.
31
31
You may use the exit code to verify the result as success or not.
32
-
32
+
33
33
34
34
35
35
"""
45
45
import time
46
46
import signal
47
47
import traceback
48
- import subprocess
49
48
50
- from ipaddress import ip_network
51
49
from swsscommon import swsscommon
52
50
from utilities_common import chassis
53
51
73
71
74
72
PRINT_MSG_LEN_MAX = 1000
75
73
76
- FRR_CHECK_RETRIES = 3
77
- FRR_WAIT_TIME = 15
78
-
79
74
class Level (Enum ):
80
75
ERR = 'ERR'
81
76
INFO = 'INFO'
@@ -146,7 +141,7 @@ def add_prefix(ip):
146
141
ip = ip + PREFIX_SEPARATOR + "32"
147
142
else :
148
143
ip = ip + PREFIX_SEPARATOR + "128"
149
- return str ( ip_network ( ip ))
144
+ return ip
150
145
151
146
152
147
def add_prefix_ifnot (ip ):
@@ -155,7 +150,7 @@ def add_prefix_ifnot(ip):
155
150
:param ip: IP to add prefix as string.
156
151
:return ip with prefix
157
152
"""
158
- return str ( ip_network ( ip )) if ip .find (PREFIX_SEPARATOR ) != - 1 else add_prefix (ip )
153
+ return ip if ip .find (PREFIX_SEPARATOR ) != - 1 else add_prefix (ip )
159
154
160
155
161
156
def is_local (ip ):
@@ -298,7 +293,7 @@ def get_routes():
298
293
299
294
def get_route_entries ():
300
295
"""
301
- helper to read present route entries from ASIC-DB and
296
+ helper to read present route entries from ASIC-DB and
302
297
as well initiate selector for ASIC-DB:ASIC-state updates.
303
298
:return (selector, subscriber, <list of sorted routes>)
304
299
"""
@@ -314,39 +309,14 @@ def get_route_entries():
314
309
res , e = checkout_rt_entry (k )
315
310
if res :
316
311
rt .append (e )
317
-
312
+
318
313
print_message (syslog .LOG_DEBUG , json .dumps ({"ASIC_ROUTE_ENTRY" : sorted (rt )}, indent = 4 ))
319
314
320
315
selector = swsscommon .Select ()
321
316
selector .addSelectable (subs )
322
317
return (selector , subs , sorted (rt ))
323
318
324
319
325
- def is_suppress_fib_pending_enabled ():
326
- """
327
- Returns True if FIB suppression is enabled, False otherwise
328
- """
329
- cfg_db = swsscommon .ConfigDBConnector ()
330
- cfg_db .connect ()
331
-
332
- state = cfg_db .get_entry ('DEVICE_METADATA' , 'localhost' ).get ('suppress-fib-pending' )
333
-
334
- return state == 'enabled'
335
-
336
-
337
- def get_frr_routes ():
338
- """
339
- Read routes from zebra through CLI command
340
- :return frr routes dictionary
341
- """
342
-
343
- output = subprocess .check_output ('show ip route json' , shell = True )
344
- routes = json .loads (output )
345
- output = subprocess .check_output ('show ipv6 route json' , shell = True )
346
- routes .update (json .loads (output ))
347
- return routes
348
-
349
-
350
320
def get_interfaces ():
351
321
"""
352
322
helper to read interface table from APPL-DB.
@@ -384,7 +354,7 @@ def filter_out_local_interfaces(keys):
384
354
385
355
chassis_local_intfs = chassis .get_chassis_local_interfaces ()
386
356
local_if_lst .update (set (chassis_local_intfs ))
387
-
357
+
388
358
db = swsscommon .DBConnector (APPL_DB_NAME , 0 )
389
359
tbl = swsscommon .Table (db , 'ROUTE_TABLE' )
390
360
@@ -523,61 +493,6 @@ def filter_out_standalone_tunnel_routes(routes):
523
493
return updated_routes
524
494
525
495
526
- def check_frr_pending_routes ():
527
- """
528
- Check FRR routes for offload flag presence by executing "show ip route json"
529
- Returns a list of routes that have no offload flag.
530
- """
531
-
532
- missed_rt = []
533
-
534
- retries = FRR_CHECK_RETRIES
535
- for i in range (retries ):
536
- missed_rt = []
537
- frr_routes = get_frr_routes ()
538
-
539
- for _ , entries in frr_routes .items ():
540
- for entry in entries :
541
- if entry ['protocol' ] != 'bgp' :
542
- continue
543
-
544
- # TODO: Also handle VRF routes. Currently this script does not check for VRF routes so it would be incorrect for us
545
- # to assume they are installed in ASIC_DB, so we don't handle them.
546
- if entry ['vrfName' ] != 'default' :
547
- continue
548
-
549
- if not entry .get ('offloaded' , False ):
550
- missed_rt .append (entry )
551
-
552
- if not missed_rt :
553
- break
554
-
555
- time .sleep (FRR_WAIT_TIME )
556
-
557
- return missed_rt
558
-
559
-
560
- def mitigate_installed_not_offloaded_frr_routes (missed_frr_rt , rt_appl ):
561
- """
562
- Mitigate installed but not offloaded FRR routes.
563
-
564
- In case route exists in APPL_DB, this function will manually send a notification to fpmsyncd
565
- to trigger the flow that sends offload flag to zebra.
566
-
567
- It is designed to mitigate a problem when orchagent fails to send notification about installed route to fpmsyncd
568
- or fpmsyncd not being able to read the notification or in case zebra fails to receive offload update due to variety of reasons.
569
- All of the above mentioned cases must be considered as a bug, but even in that case we will report an error in the log but
570
- given that this script ensures the route is installed in the hardware it will automitigate such a bug.
571
- """
572
- db = swsscommon .DBConnector ('APPL_STATE_DB' , 0 )
573
- response_producer = swsscommon .NotificationProducer (db , f'{ APPL_DB_NAME } _{ swsscommon .APP_ROUTE_TABLE_NAME } _RESPONSE_CHANNEL' )
574
- for entry in [entry for entry in missed_frr_rt if entry ['prefix' ] in rt_appl ]:
575
- fvs = swsscommon .FieldValuePairs ([('err_str' , 'SWSS_RC_SUCCESS' ), ('protocol' , entry ['protocol' ])])
576
- response_producer .send ('SWSS_RC_SUCCESS' , entry ['prefix' ], fvs )
577
-
578
- print_message (syslog .LOG_ERR , f'Mitigated route { entry ["prefix" ]} ' )
579
-
580
-
581
496
def get_soc_ips (config_db ):
582
497
mux_table = config_db .get_table ('MUX_CABLE' )
583
498
soc_ips = []
@@ -621,7 +536,7 @@ def check_routes():
621
536
"""
622
537
The heart of this script which runs the checks.
623
538
Read APPL-DB & ASIC-DB, the relevant tables for route checking.
624
- Checkout routes in ASIC-DB to match APPL-DB, discounting local &
539
+ Checkout routes in ASIC-DB to match APPL-DB, discounting local &
625
540
default routes. In case of missed / unexpected entries in ASIC,
626
541
it might be due to update latency between APPL & ASIC DBs. So collect
627
542
ASIC-DB subscribe updates for a second, and checkout if you see SET
@@ -630,16 +545,12 @@ def check_routes():
630
545
If there are still some unjustifiable diffs, between APPL & ASIC DB,
631
546
related to routes report failure, else all good.
632
547
633
- If there are FRR routes that aren't marked offloaded but all APPL & ASIC DB
634
- routes are in sync report failure and perform a mitigation action.
635
-
636
548
:return (0, None) on sucess, else (-1, results) where results holds
637
549
the unjustifiable entries.
638
550
"""
639
551
intf_appl_miss = []
640
552
rt_appl_miss = []
641
553
rt_asic_miss = []
642
- rt_frr_miss = []
643
554
644
555
results = {}
645
556
adds = []
@@ -688,22 +599,11 @@ def check_routes():
688
599
if rt_asic_miss :
689
600
results ["Unaccounted_ROUTE_ENTRY_TABLE_entries" ] = rt_asic_miss
690
601
691
- rt_frr_miss = check_frr_pending_routes ()
692
-
693
- if rt_frr_miss :
694
- results ["missed_FRR_routes" ] = rt_frr_miss
695
-
696
602
if results :
697
603
print_message (syslog .LOG_WARNING , "Failure results: {" , json .dumps (results , indent = 4 ), "}" )
698
604
print_message (syslog .LOG_WARNING , "Failed. Look at reported mismatches above" )
699
605
print_message (syslog .LOG_WARNING , "add: " , json .dumps (adds , indent = 4 ))
700
606
print_message (syslog .LOG_WARNING , "del: " , json .dumps (deletes , indent = 4 ))
701
-
702
- if rt_frr_miss and not rt_appl_miss and not rt_asic_miss :
703
- print_message (syslog .LOG_ERR , "Some routes are not set offloaded in FRR but all routes in APPL_DB and ASIC_DB are in sync" )
704
- if is_suppress_fib_pending_enabled ():
705
- mitigate_installed_not_offloaded_frr_routes (rt_frr_miss , rt_appl )
706
-
707
607
return - 1 , results
708
608
else :
709
609
print_message (syslog .LOG_INFO , "All good!" )
@@ -749,7 +649,7 @@ def main():
749
649
return ret , res
750
650
else :
751
651
return ret , res
752
-
652
+
753
653
754
654
755
655
if __name__ == "__main__" :
0 commit comments