Skip to content

Commit 7cbcfda

Browse files
Speed up route_check script (#3544)
This PR fixes #18773 How I did it Parallely execute route_check on each Asic. Parallelly fetch ipv4 routes and ipv6 routes. How to verify it execute "time route_check.py" on T2 chassis having 32k v4+32k v6 routes. Results: Before: Checking routes for namespaces: ['asic0', 'asic1'] real 3m16.387s user 1m26.084s sys 0m7.275s After: time route_check.py real 1m30.675s user 1m33.777s sys 0m8.209s
1 parent 329fc22 commit 7cbcfda

File tree

1 file changed

+110
-80
lines changed

1 file changed

+110
-80
lines changed

scripts/route_check.py

+110-80
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import signal
4747
import traceback
4848
import subprocess
49+
import concurrent.futures
4950

5051
from ipaddress import ip_network
5152
from swsscommon import swsscommon
@@ -338,10 +339,18 @@ def is_suppress_fib_pending_enabled(namespace):
338339
return state == 'enabled'
339340

340341

341-
def get_frr_routes(namespace):
342+
def fetch_routes(cmd):
342343
"""
343-
Read routes from zebra through CLI command
344-
:return frr routes dictionary
344+
Fetch routes using the given command.
345+
"""
346+
output = subprocess.check_output(cmd, text=True)
347+
return json.loads(output)
348+
349+
350+
def get_frr_routes_parallel(namespace):
351+
"""
352+
Read routes from zebra through CLI command for IPv4 and IPv6 in parallel
353+
:return combined IPv4 and IPv6 routes dictionary.
345354
"""
346355
if namespace == multi_asic.DEFAULT_NAMESPACE:
347356
v4_route_cmd = ['show', 'ip', 'route', 'json']
@@ -350,12 +359,18 @@ def get_frr_routes(namespace):
350359
v4_route_cmd = ['show', 'ip', 'route', '-n', namespace, 'json']
351360
v6_route_cmd = ['show', 'ipv6', 'route', '-n', namespace, 'json']
352361

353-
output = subprocess.check_output(v4_route_cmd, text=True)
354-
routes = json.loads(output)
355-
output = subprocess.check_output(v6_route_cmd, text=True)
356-
routes.update(json.loads(output))
357-
print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, routes))
358-
return routes
362+
with concurrent.futures.ThreadPoolExecutor() as executor:
363+
future_v4 = executor.submit(fetch_routes, v4_route_cmd)
364+
future_v6 = executor.submit(fetch_routes, v6_route_cmd)
365+
366+
# Wait for both results to complete
367+
v4_routes = future_v4.result()
368+
v6_routes = future_v6.result()
369+
370+
# Combine both IPv4 and IPv6 routes
371+
v4_routes.update(v6_routes)
372+
print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, v4_routes))
373+
return v4_routes
359374

360375

361376
def get_interfaces(namespace):
@@ -556,7 +571,7 @@ def check_frr_pending_routes(namespace):
556571
retries = FRR_CHECK_RETRIES
557572
for i in range(retries):
558573
missed_rt = []
559-
frr_routes = get_frr_routes(namespace)
574+
frr_routes = get_frr_routes_parallel(namespace)
560575

561576
for _, entries in frr_routes.items():
562577
for entry in entries:
@@ -689,8 +704,9 @@ def _filter_out_neigh_route(routes, neighs):
689704
return rt_appl_miss, rt_asic_miss
690705

691706

692-
def check_routes(namespace):
707+
def check_routes_for_namespace(namespace):
693708
"""
709+
Process a Single Namespace:
694710
The heart of this script which runs the checks.
695711
Read APPL-DB & ASIC-DB, the relevant tables for route checking.
696712
Checkout routes in ASIC-DB to match APPL-DB, discounting local &
@@ -708,98 +724,113 @@ def check_routes(namespace):
708724
:return (0, None) on sucess, else (-1, results) where results holds
709725
the unjustifiable entries.
710726
"""
711-
namespace_list = []
712-
if namespace is not multi_asic.DEFAULT_NAMESPACE and namespace in multi_asic.get_namespace_list():
713-
namespace_list.append(namespace)
714-
else:
715-
namespace_list = multi_asic.get_namespace_list()
716-
print_message(syslog.LOG_INFO, "Checking routes for namespaces: ", namespace_list)
717727

718728
results = {}
719-
adds = {}
720-
deletes = {}
721-
for namespace in namespace_list:
722-
intf_appl_miss = []
723-
rt_appl_miss = []
724-
rt_asic_miss = []
725-
rt_frr_miss = []
726-
adds[namespace] = []
727-
deletes[namespace] = []
729+
adds = []
730+
deletes = []
731+
intf_appl_miss = []
732+
rt_appl_miss = []
733+
rt_asic_miss = []
734+
rt_frr_miss = []
728735

729-
selector, subs, rt_asic = get_asicdb_routes(namespace)
736+
selector, subs, rt_asic = get_asicdb_routes(namespace)
730737

731-
rt_appl = get_appdb_routes(namespace)
732-
intf_appl = get_interfaces(namespace)
738+
rt_appl = get_appdb_routes(namespace)
739+
intf_appl = get_interfaces(namespace)
733740

734-
# Diff APPL-DB routes & ASIC-DB routes
735-
rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic)
741+
# Diff APPL-DB routes & ASIC-DB routes
742+
rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic)
736743

737-
# Check missed ASIC routes against APPL-DB INTF_TABLE
738-
_, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss)
739-
rt_asic_miss = filter_out_default_routes(rt_asic_miss)
740-
rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss)
741-
rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss)
742-
rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss)
744+
# Check missed ASIC routes against APPL-DB INTF_TABLE
745+
_, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss)
746+
rt_asic_miss = filter_out_default_routes(rt_asic_miss)
747+
rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss)
748+
rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss)
749+
rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss)
743750

751+
# Check APPL-DB INTF_TABLE with ASIC table route entries
752+
intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic)
744753

745-
# Check APPL-DB INTF_TABLE with ASIC table route entries
746-
intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic)
754+
if rt_appl_miss:
755+
rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss)
747756

748-
if rt_appl_miss:
749-
rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss)
757+
if rt_appl_miss:
758+
rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss)
750759

751-
if rt_appl_miss:
752-
rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss)
760+
# NOTE: On dualtor environment, ignore any route miss for the
761+
# neighbors learned from the vlan subnet.
762+
if rt_appl_miss or rt_asic_miss:
763+
rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss)
753764

754-
# NOTE: On dualtor environment, ignore any route miss for the
755-
# neighbors learned from the vlan subnet.
756-
if rt_appl_miss or rt_asic_miss:
757-
rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss)
765+
if rt_appl_miss or rt_asic_miss:
766+
# Look for subscribe updates for a second
767+
adds, deletes = get_subscribe_updates(selector, subs)
758768

759-
if rt_appl_miss or rt_asic_miss:
760-
# Look for subscribe updates for a second
761-
adds[namespace], deletes[namespace] = get_subscribe_updates(selector, subs)
769+
# Drop all those for which SET received
770+
rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds)
762771

763-
# Drop all those for which SET received
764-
rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds[namespace])
772+
# Drop all those for which DEL received
773+
rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes)
765774

766-
# Drop all those for which DEL received
767-
rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes[namespace])
775+
if rt_appl_miss:
776+
results["missed_ROUTE_TABLE_routes"] = rt_appl_miss
768777

769-
if rt_appl_miss:
770-
if namespace not in results:
771-
results[namespace] = {}
772-
results[namespace]["missed_ROUTE_TABLE_routes"] = rt_appl_miss
778+
if intf_appl_miss:
779+
results["missed_INTF_TABLE_entries"] = intf_appl_miss
773780

774-
if intf_appl_miss:
775-
if namespace not in results:
776-
results[namespace] = {}
777-
results[namespace]["missed_INTF_TABLE_entries"] = intf_appl_miss
781+
if rt_asic_miss:
782+
results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
778783

779-
if rt_asic_miss:
780-
if namespace not in results:
781-
results[namespace] = {}
782-
results[namespace]["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
784+
rt_frr_miss = check_frr_pending_routes(namespace)
783785

784-
rt_frr_miss = check_frr_pending_routes(namespace)
786+
if rt_frr_miss:
787+
results["missed_FRR_routes"] = rt_frr_miss
785788

786-
if rt_frr_miss:
787-
if namespace not in results:
788-
results[namespace] = {}
789-
results[namespace]["missed_FRR_routes"] = rt_frr_miss
789+
if results:
790+
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
791+
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \
792+
but all routes in APPL_DB and ASIC_DB are in sync".format(namespace))
793+
if is_suppress_fib_pending_enabled(namespace):
794+
mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl)
795+
796+
return results, adds, deletes
790797

791-
if results:
792-
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
793-
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \
794-
but all routes in APPL_DB and ASIC_DB are in sync".format(namespace))
795-
if is_suppress_fib_pending_enabled(namespace):
796-
mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl)
798+
799+
def check_routes(namespace):
800+
"""
801+
Main function to parallelize route checks across all namespaces.
802+
"""
803+
namespace_list = []
804+
if namespace is not multi_asic.DEFAULT_NAMESPACE and namespace in multi_asic.get_namespace_list():
805+
namespace_list.append(namespace)
806+
else:
807+
namespace_list = multi_asic.get_namespace_list()
808+
print_message(syslog.LOG_INFO, "Checking routes for namespaces: ", namespace_list)
809+
810+
results = {}
811+
all_adds = {}
812+
all_deletes = {}
813+
814+
# Use ThreadPoolExecutor to parallelize the check for each namespace
815+
with concurrent.futures.ThreadPoolExecutor() as executor:
816+
futures = {executor.submit(check_routes_for_namespace, ns): ns for ns in namespace_list}
817+
818+
for future in concurrent.futures.as_completed(futures):
819+
ns = futures[future]
820+
try:
821+
result, adds, deletes = future.result()
822+
if result:
823+
results[ns] = result
824+
all_adds[ns] = adds
825+
all_deletes[ns] = deletes
826+
except Exception as e:
827+
print_message(syslog.LOG_ERR, "Error processing namespace {}: {}".format(ns, e))
797828

798829
if results:
799830
print_message(syslog.LOG_WARNING, "Failure results: {", json.dumps(results, indent=4), "}")
800831
print_message(syslog.LOG_WARNING, "Failed. Look at reported mismatches above")
801-
print_message(syslog.LOG_WARNING, "add: ", json.dumps(adds, indent=4))
802-
print_message(syslog.LOG_WARNING, "del: ", json.dumps(deletes, indent=4))
832+
print_message(syslog.LOG_WARNING, "add: ", json.dumps(all_adds, indent=4))
833+
print_message(syslog.LOG_WARNING, "del: ", json.dumps(all_deletes, indent=4))
803834
return -1, results
804835
else:
805836
print_message(syslog.LOG_INFO, "All good!")
@@ -862,6 +893,5 @@ def main():
862893
return ret, res
863894

864895

865-
866896
if __name__ == "__main__":
867897
sys.exit(main()[0])

0 commit comments

Comments
 (0)