Skip to content

Commit 4f7ed16

Browse files
Speed up route_check.py (#3604)
1 parent 35569c8 commit 4f7ed16

File tree

1 file changed

+121
-91
lines changed

1 file changed

+121
-91
lines changed

scripts/route_check.py

+121-91
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import signal
4747
import traceback
4848
import subprocess
49+
import concurrent.futures
4950

5051
from ipaddress import ip_network
5152
from swsscommon import swsscommon
@@ -348,10 +349,18 @@ def is_suppress_fib_pending_enabled(namespace):
348349
return state == 'enabled'
349350

350351

351-
def get_frr_routes(namespace):
352+
def fetch_routes(cmd):
352353
"""
353-
Read routes from zebra through CLI command
354-
:return frr routes dictionary
354+
Fetch routes using the given command.
355+
"""
356+
output = subprocess.check_output(cmd, text=True)
357+
return json.loads(output)
358+
359+
360+
def get_frr_routes_parallel(namespace):
361+
"""
362+
Read routes from zebra through CLI command for IPv4 and IPv6 in parallel
363+
:return combined IPv4 and IPv6 routes dictionary.
355364
"""
356365
if namespace == multi_asic.DEFAULT_NAMESPACE:
357366
v4_route_cmd = ['show', 'ip', 'route', 'json']
@@ -360,12 +369,18 @@ def get_frr_routes(namespace):
360369
v4_route_cmd = ['show', 'ip', 'route', '-n', namespace, 'json']
361370
v6_route_cmd = ['show', 'ipv6', 'route', '-n', namespace, 'json']
362371

363-
output = subprocess.check_output(v4_route_cmd, text=True)
364-
routes = json.loads(output)
365-
output = subprocess.check_output(v6_route_cmd, text=True)
366-
routes.update(json.loads(output))
367-
print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, routes))
368-
return routes
372+
with concurrent.futures.ThreadPoolExecutor() as executor:
373+
future_v4 = executor.submit(fetch_routes, v4_route_cmd)
374+
future_v6 = executor.submit(fetch_routes, v6_route_cmd)
375+
376+
# Wait for both results to complete
377+
v4_routes = future_v4.result()
378+
v6_routes = future_v6.result()
379+
380+
# Combine both IPv4 and IPv6 routes
381+
v4_routes.update(v6_routes)
382+
print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, v4_routes))
383+
return v4_routes
369384

370385

371386
def get_interfaces(namespace):
@@ -566,7 +581,7 @@ def check_frr_pending_routes(namespace):
566581
retries = FRR_CHECK_RETRIES
567582
for i in range(retries):
568583
missed_rt = []
569-
frr_routes = get_frr_routes(namespace)
584+
frr_routes = get_frr_routes_parallel(namespace)
570585

571586
for _, entries in frr_routes.items():
572587
for entry in entries:
@@ -699,8 +714,9 @@ def _filter_out_neigh_route(routes, neighs):
699714
return rt_appl_miss, rt_asic_miss
700715

701716

702-
def check_routes(namespace):
717+
def check_routes_for_namespace(namespace):
703718
"""
719+
Process a Single Namespace:
704720
The heart of this script which runs the checks.
705721
Read APPL-DB & ASIC-DB, the relevant tables for route checking.
706722
Checkout routes in ASIC-DB to match APPL-DB, discounting local &
@@ -718,6 +734,83 @@ def check_routes(namespace):
718734
:return (0, None) on sucess, else (-1, results) where results holds
719735
the unjustifiable entries.
720736
"""
737+
738+
results = {}
739+
adds = []
740+
deletes = []
741+
intf_appl_miss = []
742+
rt_appl_miss = []
743+
rt_asic_miss = []
744+
rt_frr_miss = []
745+
746+
selector, subs, rt_asic = get_asicdb_routes(namespace)
747+
748+
rt_appl = get_appdb_routes(namespace)
749+
intf_appl = get_interfaces(namespace)
750+
751+
# Diff APPL-DB routes & ASIC-DB routes
752+
rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic)
753+
754+
# Check missed ASIC routes against APPL-DB INTF_TABLE
755+
_, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss)
756+
rt_asic_miss = filter_out_default_routes(rt_asic_miss)
757+
rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss)
758+
rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss)
759+
rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss)
760+
761+
# Check APPL-DB INTF_TABLE with ASIC table route entries
762+
intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic)
763+
764+
if rt_appl_miss:
765+
rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss)
766+
767+
if rt_appl_miss:
768+
rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss)
769+
770+
# NOTE: On dualtor environment, ignore any route miss for the
771+
# neighbors learned from the vlan subnet.
772+
if rt_appl_miss or rt_asic_miss:
773+
rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss)
774+
775+
if rt_appl_miss or rt_asic_miss:
776+
# Look for subscribe updates for a second
777+
adds, deletes = get_subscribe_updates(selector, subs)
778+
779+
# Drop all those for which SET received
780+
rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds)
781+
782+
# Drop all those for which DEL received
783+
rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes)
784+
785+
if rt_appl_miss:
786+
results["missed_ROUTE_TABLE_routes"] = rt_appl_miss
787+
788+
if intf_appl_miss:
789+
results["missed_INTF_TABLE_entries"] = intf_appl_miss
790+
791+
if rt_asic_miss:
792+
results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
793+
794+
if is_bgp_suppress_fib_pending_enabled(namespace):
795+
rt_frr_miss = check_frr_pending_routes(namespace)
796+
797+
if rt_frr_miss:
798+
results["missed_FRR_routes"] = rt_frr_miss
799+
800+
if results:
801+
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
802+
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \
803+
but all routes in APPL_DB and ASIC_DB are in sync".format(namespace))
804+
if is_suppress_fib_pending_enabled(namespace):
805+
mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl)
806+
807+
return results, adds, deletes
808+
809+
810+
def check_routes(namespace):
811+
"""
812+
Main function to parallelize route checks across all namespaces.
813+
"""
721814
namespace_list = []
722815
if namespace is not multi_asic.DEFAULT_NAMESPACE and namespace in multi_asic.get_namespace_list():
723816
namespace_list.append(namespace)
@@ -726,85 +819,23 @@ def check_routes(namespace):
726819
print_message(syslog.LOG_INFO, "Checking routes for namespaces: ", namespace_list)
727820

728821
results = {}
729-
adds = {}
730-
deletes = {}
731-
for namespace in namespace_list:
732-
intf_appl_miss = []
733-
rt_appl_miss = []
734-
rt_asic_miss = []
735-
rt_frr_miss = []
736-
adds[namespace] = []
737-
deletes[namespace] = []
738-
739-
selector, subs, rt_asic = get_asicdb_routes(namespace)
740-
741-
rt_appl = get_appdb_routes(namespace)
742-
intf_appl = get_interfaces(namespace)
743-
744-
# Diff APPL-DB routes & ASIC-DB routes
745-
rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic)
746-
747-
# Check missed ASIC routes against APPL-DB INTF_TABLE
748-
_, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss)
749-
rt_asic_miss = filter_out_default_routes(rt_asic_miss)
750-
rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss)
751-
rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss)
752-
rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss)
753-
754-
755-
# Check APPL-DB INTF_TABLE with ASIC table route entries
756-
intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic)
757-
758-
if rt_appl_miss:
759-
rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss)
760-
761-
if rt_appl_miss:
762-
rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss)
763-
764-
# NOTE: On dualtor environment, ignore any route miss for the
765-
# neighbors learned from the vlan subnet.
766-
if rt_appl_miss or rt_asic_miss:
767-
rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss)
768-
769-
if rt_appl_miss or rt_asic_miss:
770-
# Look for subscribe updates for a second
771-
adds[namespace], deletes[namespace] = get_subscribe_updates(selector, subs)
772-
773-
# Drop all those for which SET received
774-
rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds[namespace])
775-
776-
# Drop all those for which DEL received
777-
rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes[namespace])
778-
779-
if rt_appl_miss:
780-
if namespace not in results:
781-
results[namespace] = {}
782-
results[namespace]["missed_ROUTE_TABLE_routes"] = rt_appl_miss
783-
784-
if intf_appl_miss:
785-
if namespace not in results:
786-
results[namespace] = {}
787-
results[namespace]["missed_INTF_TABLE_entries"] = intf_appl_miss
788-
789-
if rt_asic_miss:
790-
if namespace not in results:
791-
results[namespace] = {}
792-
results[namespace]["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss
793-
794-
if is_bgp_suppress_fib_pending_enabled(namespace):
795-
rt_frr_miss = check_frr_pending_routes(namespace)
796-
797-
if rt_frr_miss:
798-
if namespace not in results:
799-
results[namespace] = {}
800-
results[namespace]["missed_FRR_routes"] = rt_frr_miss
801-
802-
if results:
803-
if rt_frr_miss and not rt_appl_miss and not rt_asic_miss:
804-
print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} but all "
805-
"routes in APPL_DB and ASIC_DB are in sync".format(namespace))
806-
if is_suppress_fib_pending_enabled(namespace):
807-
mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl)
822+
all_adds = {}
823+
all_deletes = {}
824+
825+
# Use ThreadPoolExecutor to parallelize the check for each namespace
826+
with concurrent.futures.ThreadPoolExecutor() as executor:
827+
futures = {executor.submit(check_routes_for_namespace, ns): ns for ns in namespace_list}
828+
829+
for future in concurrent.futures.as_completed(futures):
830+
ns = futures[future]
831+
try:
832+
result, adds, deletes = future.result()
833+
if result:
834+
results[ns] = result
835+
all_adds[ns] = adds
836+
all_deletes[ns] = deletes
837+
except Exception as e:
838+
print_message(syslog.LOG_ERR, "Error processing namespace {}: {}".format(ns, e))
808839

809840
if results:
810841
print_message(syslog.LOG_WARNING, "Failure results: {", json.dumps(results, indent=4), "}")
@@ -873,6 +904,5 @@ def main():
873904
return ret, res
874905

875906

876-
877907
if __name__ == "__main__":
878908
sys.exit(main()[0])

0 commit comments

Comments
 (0)