Skip to content

Commit 32d18d2

Browse files
authored
[Reclaiming buffer][202012] Database migrator for reclaiming buffer (#1898)
This is to cherry-pick community PR #1822 to 202012. - What I did Db migrator support reclaiming reserved buffer for unused ports As there is no empty slot for database version between 202012 and master (202111), this migration will be done regardless of whether database version is changed. The DB migrator should be idempotent. - How I did it For admin down ports, if the buffer objects configuration aligns with default configuration, set the buffer objects configuration as: 1. Dynamic model: all normal buffer objects are configured on admin down ports. Buffer manager will apply zero profiles on admin down ports. 2. Static model: zero buffer objects are configured on admin down ports. - How to verify it Unit test and manually Signed-off-by: Stephen Sun <[email protected]>
1 parent 44ec4c8 commit 32d18d2

File tree

166 files changed

+20445
-3401
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

166 files changed

+20445
-3401
lines changed

scripts/db_migrator.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(self, namespace, socket=None):
7575

7676
if asic_type == "mellanox":
7777
from mellanox_buffer_migrator import MellanoxBufferMigrator
78-
self.mellanox_buffer_migrator = MellanoxBufferMigrator(self.configDB)
78+
self.mellanox_buffer_migrator = MellanoxBufferMigrator(self.configDB, self.appDB, self.stateDB)
7979

8080
def migrate_pfc_wd_table(self):
8181
'''
@@ -365,9 +365,9 @@ def prepare_dynamic_buffer_for_warm_reboot(self, buffer_pools=None, buffer_profi
365365
self.appDB.set(self.appDB.APPL_DB, appl_db_key, field, data)
366366

367367
if keys_copied:
368-
log.log_info("The following items in table {} in CONFIG_DB have been copied to APPL_DB: {}".format(table_name, keys_copied))
368+
log.log_notice("The following items in table {} in CONFIG_DB have been copied to APPL_DB: {}".format(table_name, keys_copied))
369369
if keys_ignored:
370-
log.log_info("The following items in table {} in CONFIG_DB have been ignored: {}".format(table_name, keys_copied))
370+
log.log_notice("The following items in table {} in CONFIG_DB have been ignored: {}".format(table_name, keys_copied))
371371

372372
return True
373373

@@ -547,6 +547,16 @@ def common_migration_ops(self):
547547

548548
self.migrate_copp_table()
549549

550+
# To migrate buffer on Mellanox platforms
551+
# For legacy branches, this is the only place it can be called because
552+
# - Putting it in version 1_0_6 causes db_migrator not able to run
553+
# when the switch is migrated from 202012-no-reclaiming-buffer whose db version is 2_0_0
554+
# to 202012-reclaiming-buffer, which causes reclaiming buffer not take effect
555+
# - Putting it in version 2_0_0 is not a solution either because
556+
# version 2_0_1 has been occupied by 202106
557+
if self.asic_type == "mellanox":
558+
self.mellanox_buffer_migrator.mlnx_reclaiming_unused_buffer()
559+
550560
def migrate(self):
551561
version = self.get_version()
552562
log.log_info('Upgrading from version ' + version)

scripts/mellanox_buffer_migrator.py

+232-1
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,18 @@
7979
Not providing it means no buffer profile migration required.
8080
"""
8181
from sonic_py_common import logger
82+
import re
8283

8384
SYSLOG_IDENTIFIER = 'mellanox_buffer_migrator'
8485

8586
# Global logger instance
8687
log = logger.Logger(SYSLOG_IDENTIFIER)
8788

8889
class MellanoxBufferMigrator():
89-
def __init__(self, configDB):
90+
def __init__(self, configDB, appDB, stateDB):
9091
self.configDB = configDB
92+
self.appDB = appDB
93+
self.stateDB = stateDB
9194

9295
self.platform = None
9396
self.sku = None
@@ -834,3 +837,231 @@ def mlnx_flush_new_buffer_configuration(self):
834837

835838
def mlnx_is_buffer_model_dynamic(self):
836839
return self.is_buffer_config_default and not self.is_msft_sku
840+
841+
def mlnx_sort_buffer_tables(self, buffer_table, name):
842+
result = {}
843+
for key, item in buffer_table.items():
844+
if len(key) != 2:
845+
log.log_error('Table {} contains invalid key {}, skip this item'.format(name, key))
846+
continue
847+
port, ids = key
848+
if not port in result:
849+
result[port] = {}
850+
result[port][ids] = item
851+
852+
return result
853+
854+
def mlnx_reclaiming_unused_buffer(self):
855+
cable_length_key = self.configDB.get_keys('CABLE_LENGTH')
856+
if not cable_length_key:
857+
log.log_notice("No cable length table defined, do not migrate buffer objects for reclaiming buffer")
858+
return
859+
860+
log.log_info("Migrate buffer objects for reclaiming buffer based on 'CABLE_LENGTH|{}'".format(cable_length_key[0]))
861+
862+
device_metadata = self.configDB.get_entry('DEVICE_METADATA', 'localhost')
863+
is_dynamic = (device_metadata.get('buffer_model') == 'dynamic')
864+
865+
port_table = self.configDB.get_table('PORT')
866+
buffer_pool_table = self.configDB.get_table('BUFFER_POOL')
867+
buffer_profile_table = self.configDB.get_table('BUFFER_PROFILE')
868+
buffer_pg_table = self.configDB.get_table('BUFFER_PG')
869+
buffer_queue_table = self.configDB.get_table('BUFFER_QUEUE')
870+
buffer_ingress_profile_list_table = self.configDB.get_table('BUFFER_PORT_INGRESS_PROFILE_LIST')
871+
buffer_egress_profile_list_table = self.configDB.get_table('BUFFER_PORT_EGRESS_PROFILE_LIST')
872+
cable_length_entries = self.configDB.get_entry('CABLE_LENGTH', cable_length_key[0])
873+
874+
buffer_pg_items = self.mlnx_sort_buffer_tables(buffer_pg_table, 'BUFFER_PG')
875+
buffer_queue_items = self.mlnx_sort_buffer_tables(buffer_queue_table, 'BUFFER_QUEUE')
876+
877+
single_pool = True
878+
if 'ingress_lossy_pool' in buffer_pool_table:
879+
ingress_lossy_profile = buffer_profile_table.get('ingress_lossy_profile')
880+
if ingress_lossy_profile:
881+
if '[BUFFER_POOL|ingress_lossy_pool]' == ingress_lossy_profile.get('pool'):
882+
single_pool = False
883+
884+
# Construct buffer items to be applied to admin down ports
885+
if is_dynamic:
886+
# For dynamic model, we just need to add the default buffer objects to admin down ports
887+
# Buffer manager will apply zero profiles automatically when a port is shutdown
888+
lossy_pg_item = {'profile': '[BUFFER_PROFILE|ingress_lossy_profile]'} if 'ingress_lossy_profile' in buffer_profile_table else None
889+
lossy_queue_item = {'profile': '[BUFFER_PROFILE|q_lossy_profile]'} if 'q_lossy_profile' in buffer_profile_table else None
890+
lossless_queue_item = {'profile': '[BUFFER_PROFILE|egress_lossless_profile]'} if 'egress_lossless_profile' in buffer_profile_table else None
891+
892+
queue_items_to_apply = {'0-2': {'profile': '[BUFFER_PROFILE|q_lossy_profile]'},
893+
'3-4': {'profile': '[BUFFER_PROFILE|egress_lossless_profile]'},
894+
'5-6': {'profile': '[BUFFER_PROFILE|q_lossy_profile]'}}
895+
896+
if single_pool:
897+
if 'ingress_lossless_profile' in buffer_profile_table:
898+
ingress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|ingress_lossless_profile]'}
899+
else:
900+
ingress_profile_list_item = None
901+
else:
902+
if 'ingress_lossless_profile' in buffer_profile_table and 'ingress_lossy_profile' in buffer_profile_table:
903+
ingress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|ingress_lossless_profile],[BUFFER_PROFILE|ingress_lossy_profile]'}
904+
else:
905+
ingress_profile_list_item = None
906+
907+
if 'egress_lossless_profile' in buffer_profile_table and 'egress_lossy_profile' in buffer_profile_table:
908+
egress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|egress_lossless_profile],[BUFFER_PROFILE|egress_lossy_profile]'}
909+
else:
910+
egress_profile_list_item = None
911+
912+
pools_to_insert = None
913+
profiles_to_insert = None
914+
915+
else:
916+
# For static model, we need more.
917+
# Define zero buffer pools and profiles
918+
ingress_zero_pool = {'size': '0', 'mode': 'static', 'type': 'ingress'}
919+
ingress_lossy_pg_zero_profile = {
920+
"pool":"[BUFFER_POOL|ingress_zero_pool]",
921+
"size":"0",
922+
"static_th":"0"
923+
}
924+
lossy_pg_item = {'profile': '[BUFFER_PROFILE|ingress_lossy_pg_zero_profile]'}
925+
926+
ingress_lossless_zero_profile = {
927+
"pool":"[BUFFER_POOL|ingress_lossless_pool]",
928+
"size":"0",
929+
"dynamic_th":"-8"
930+
}
931+
932+
if single_pool:
933+
ingress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|ingress_lossless_zero_profile]'}
934+
else:
935+
ingress_lossy_zero_profile = {
936+
"pool":"[BUFFER_POOL|ingress_lossy_pool]",
937+
"size":"0",
938+
"dynamic_th":"-8"
939+
}
940+
ingress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|ingress_lossless_zero_profile],[BUFFER_PROFILE|ingress_lossy_zero_profile]'}
941+
942+
egress_lossless_zero_profile = {
943+
"pool":"[BUFFER_POOL|egress_lossless_pool]",
944+
"size":"0",
945+
"dynamic_th":"-8"
946+
}
947+
lossless_queue_item = {'profile': '[BUFFER_PROFILE|egress_lossless_zero_profile]'}
948+
949+
egress_lossy_zero_profile = {
950+
"pool":"[BUFFER_POOL|egress_lossy_pool]",
951+
"size":"0",
952+
"dynamic_th":"-8"
953+
}
954+
lossy_queue_item = {'profile': '[BUFFER_PROFILE|egress_lossy_zero_profile]'}
955+
egress_profile_list_item = {'profile_list': '[BUFFER_PROFILE|egress_lossless_zero_profile],[BUFFER_PROFILE|egress_lossy_zero_profile]'}
956+
957+
queue_items_to_apply = {'0-2': {'profile': '[BUFFER_PROFILE|egress_lossy_zero_profile]'},
958+
'3-4': {'profile': '[BUFFER_PROFILE|egress_lossless_zero_profile]'},
959+
'5-6': {'profile': '[BUFFER_PROFILE|egress_lossy_zero_profile]'}}
960+
961+
pools_to_insert = {'ingress_zero_pool': ingress_zero_pool}
962+
profiles_to_insert = {'ingress_lossy_pg_zero_profile': ingress_lossy_pg_zero_profile,
963+
'ingress_lossless_zero_profile': ingress_lossless_zero_profile,
964+
'egress_lossless_zero_profile': egress_lossless_zero_profile,
965+
'egress_lossy_zero_profile': egress_lossy_zero_profile}
966+
if not single_pool:
967+
profiles_to_insert['ingress_lossy_zero_profile'] = ingress_lossy_zero_profile
968+
969+
lossless_profile_pattern = '\[BUFFER_PROFILE\|pg_lossless_([1-9][0-9]*000)_([1-9][0-9]*m)_profile\]'
970+
zero_item_count = 0
971+
reclaimed_ports = set()
972+
for port_name, port_info in port_table.items():
973+
if port_info.get('admin_status') == 'up':
974+
# Handles admin down ports only
975+
continue
976+
977+
if lossy_pg_item:
978+
port_pgs = buffer_pg_items.get(port_name)
979+
is_default = False
980+
if not port_pgs:
981+
is_default = True
982+
else:
983+
if set(port_pgs.keys()) == set(['3-4']):
984+
if is_dynamic:
985+
reclaimed_ports.add(port_name)
986+
if port_pgs['3-4']['profile'] == 'NULL':
987+
is_default = True
988+
else:
989+
match = re.search(lossless_profile_pattern, port_pgs['3-4']['profile'])
990+
if match:
991+
speed = match.group(1)
992+
cable_length = match.group(2)
993+
if speed == port_info.get('speed') and cable_length == cable_length_entries.get(port_name):
994+
is_default = True
995+
996+
if is_default:
997+
lossy_pg_key = '{}|0'.format(port_name)
998+
lossless_pg_key = '{}|3-4'.format(port_name)
999+
self.configDB.set_entry('BUFFER_PG', lossy_pg_key, lossy_pg_item)
1000+
if is_dynamic:
1001+
self.configDB.set_entry('BUFFER_PG', lossless_pg_key, {'profile': 'NULL'})
1002+
# For traditional model, we must NOT remove the default lossless PG
1003+
# because it has been popagated to APPL_DB during db_migrator
1004+
# Leaving it untouched in CONFIG_DB enables traditional buffer manager to
1005+
# remove it from CONFIG_DB as well as APPL_DB
1006+
# However, removing it from CONFIG_DB causes it left in APPL_DB
1007+
zero_item_count += 1
1008+
1009+
if lossy_queue_item and lossless_queue_item:
1010+
port_queues = buffer_queue_items.get(port_name)
1011+
if not port_queues:
1012+
for ids, item in queue_items_to_apply.items():
1013+
self.configDB.set_entry('BUFFER_QUEUE', port_name + '|' + ids, item)
1014+
zero_item_count += 1
1015+
1016+
if ingress_profile_list_item:
1017+
port_ingress_profile_list = buffer_ingress_profile_list_table.get(port_name)
1018+
if not port_ingress_profile_list:
1019+
self.configDB.set_entry('BUFFER_PORT_INGRESS_PROFILE_LIST', port_name, ingress_profile_list_item)
1020+
zero_item_count += 1
1021+
1022+
if egress_profile_list_item:
1023+
port_egress_profile_list = buffer_egress_profile_list_table.get(port_name)
1024+
if not port_egress_profile_list:
1025+
self.configDB.set_entry('BUFFER_PORT_EGRESS_PROFILE_LIST', port_name, egress_profile_list_item)
1026+
zero_item_count += 1
1027+
1028+
if zero_item_count > 0:
1029+
if pools_to_insert:
1030+
for name, pool in pools_to_insert.items():
1031+
self.configDB.set_entry('BUFFER_POOL', name, pool)
1032+
1033+
if profiles_to_insert:
1034+
for name, profile in profiles_to_insert.items():
1035+
self.configDB.set_entry('BUFFER_PROFILE', name, profile)
1036+
1037+
# We need to remove BUFFER_PG table items for admin down ports from APPL_DB
1038+
# and then remove the buffer profiles which are no longer referenced
1039+
# We do it here because
1040+
# - The buffer profiles were copied from CONFIG_DB by db_migrator when the database was being migrated from 1.0.6 to 2.0.0
1041+
# - In this migrator the buffer priority-groups have been removed from CONFIG_DB.BUFFER_PG table
1042+
# - The dynamic buffer manager will not generate buffer profile by those buffer PG items
1043+
# In case a buffer profile was referenced by an admin down port only, the dynamic buffer manager won't create it after starting
1044+
# This kind of buffer profiles will be left in APPL_DB and can not be removed.
1045+
if not is_dynamic:
1046+
return
1047+
1048+
warmreboot_state = self.stateDB.get(self.stateDB.STATE_DB, 'WARM_RESTART_ENABLE_TABLE|system', 'enable')
1049+
if warmreboot_state == 'true':
1050+
referenced_profiles = set()
1051+
keys = self.appDB.keys(self.appDB.APPL_DB, "BUFFER_PG_TABLE:*")
1052+
if keys is None:
1053+
return
1054+
for buffer_pg_key in keys:
1055+
port, pg = buffer_pg_key.split(':')[1:]
1056+
if port in reclaimed_ports:
1057+
self.appDB.delete(self.appDB.APPL_DB, buffer_pg_key)
1058+
else:
1059+
buffer_pg_items = self.appDB.get_all(self.appDB.APPL_DB, buffer_pg_key)
1060+
profile = buffer_pg_items.get('profile')
1061+
if profile:
1062+
referenced_profiles.add(profile[22:-1])
1063+
keys = self.appDB.keys(self.appDB.APPL_DB, "BUFFER_PROFILE_TABLE:*")
1064+
for buffer_profile_key in keys:
1065+
profile = buffer_profile_key.split(':')[1]
1066+
if profile[:12] == 'pg_lossless_' and profile not in referenced_profiles and profile not in buffer_profile_table.keys():
1067+
self.appDB.delete(self.appDB.APPL_DB, buffer_profile_key)

tests/db_migrator_input/appl_db/acs-msn4700-t1-version_2_0_0.json

-7
Original file line numberDiff line numberDiff line change
@@ -712,13 +712,6 @@
712712
"pool": "[BUFFER_POOL_TABLE:ingress_lossless_pool]",
713713
"size": "124928"
714714
},
715-
"BUFFER_PROFILE_TABLE:pg_lossless_400000_300m_profile": {
716-
"xon": "37888",
717-
"dynamic_th": "0",
718-
"xoff": "373760",
719-
"pool": "[BUFFER_POOL_TABLE:ingress_lossless_pool]",
720-
"size": "420864"
721-
},
722715
"BUFFER_PROFILE_TABLE:q_lossy_profile": {
723716
"dynamic_th": "3",
724717
"pool": "[BUFFER_POOL_TABLE:egress_lossy_pool]",

0 commit comments

Comments
 (0)