Skip to content

Commit cc938e7

Browse files
authored
Dynamic port configuration - solve lldp issues when adding/removing ports (#9386)
#### Why I did it when adding and removing ports after init stage we saw two issues: first: In several cases, after removing a port, lldpmgr is continuing to try to add a port to lldp with lldpcli command. the execution of this command is continuing to fail since the port is not existing anymore. second: after adding a port, we sometimes see this warning messgae: "Command failed 'lldpcli configure ports Ethernet18 lldp portidsubtype local etp5b': 2021-07-27T14:16:54 [WARN/lldpctl] cannot find port Ethernet18" we added these changes in order to solve it. #### How I did it port create events are taken from app db only. lldpcli command is executed only when linux port is up. when delete port event is received we remove this command from pending_cmds dictionary #### How to verify it manual tests and running lldp tests #### Description for the changelog Dynamic port configuration - solve lldp issues when adding/removing ports
1 parent 9282618 commit cc938e7

File tree

1 file changed

+54
-62
lines changed

1 file changed

+54
-62
lines changed

dockers/docker-lldp/lldpmgrd

+54-62
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ VERSION = "1.0"
3030

3131
SYSLOG_IDENTIFIER = "lldpmgrd"
3232
PORT_INIT_TIMEOUT = 300
33+
FAILED_CMD_TIMEOUT = 6
34+
RETRY_LIMIT = 5
3335

3436

3537
class LldpManager(daemon_base.DaemonBase):
@@ -41,7 +43,8 @@ class LldpManager(daemon_base.DaemonBase):
4143
state_db: Handle to Redis State database via swsscommon lib
4244
config_db: Handle to Redis Config database via swsscommon lib
4345
pending_cmds: Dictionary where key is port name, value is pending
44-
LLDP configuration command to run
46+
LLDP configuration command to run
47+
and the last timestamp that this command was failed (used for retry mechanism)
4548
"""
4649
REDIS_TIMEOUT_MS = 0
4750

@@ -58,6 +61,11 @@ class LldpManager(daemon_base.DaemonBase):
5861
self.REDIS_TIMEOUT_MS,
5962
False)
6063

64+
# Open a handle to the State database
65+
self.state_db = swsscommon.DBConnector("STATE_DB",
66+
self.REDIS_TIMEOUT_MS,
67+
False)
68+
6169
self.pending_cmds = {}
6270
self.hostname = "None"
6371
self.mgmt_ip = "None"
@@ -66,6 +74,7 @@ class LldpManager(daemon_base.DaemonBase):
6674
self.port_table = swsscommon.Table(self.config_db, swsscommon.CFG_PORT_TABLE_NAME)
6775
self.mgmt_table = swsscommon.Table(self.config_db, swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME)
6876
self.app_port_table = swsscommon.Table(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
77+
self.state_port_table = swsscommon.Table(self.state_db, swsscommon.STATE_PORT_TABLE_NAME)
6978

7079
def update_hostname(self, hostname):
7180
cmd = "lldpcli configure system hostname {0}".format(hostname)
@@ -99,32 +108,25 @@ class LldpManager(daemon_base.DaemonBase):
99108

100109
def is_port_up(self, port_name):
101110
"""
102-
Determine if a port is up or down by looking into the oper-status for the port in
103-
PORT TABLE in the Application DB
111+
Determine if a port is up or down by looking into the netdev_oper_status for the port in
112+
PORT TABLE in the State DB
104113
"""
105114
# Retrieve all entires for this port from the Port table
106-
(status, fvp) = self.app_port_table.get(port_name)
115+
(status, fvp) = self.state_port_table.get(port_name)
107116
if status:
108117
# Convert list of tuples to a dictionary
109118
port_table_dict = dict(fvp)
110119

111120
# Get the oper-status for the port
112-
if "oper_status" in port_table_dict:
113-
port_oper_status = port_table_dict.get("oper_status")
114-
self.log_info("Port name {} oper status: {}".format(port_name, port_oper_status))
121+
if "netdev_oper_status" in port_table_dict:
122+
port_oper_status = port_table_dict.get("netdev_oper_status")
115123
return port_oper_status == "up"
116124
else:
117125
return False
118126
else:
119-
# Retrieve PortInitDone entry from the Port table
120-
(init_status, init_fvp) = self.port_table.get("PortInitDone")
121-
# The initialization procedure is done, but don't have this port entry
122-
if init_status:
123-
self.log_error("Port '{}' not found in {} table in App DB".format(
124-
port_name, swsscommon.APP_PORT_TABLE_NAME))
125127
return False
126128

127-
def generate_pending_lldp_config_cmd_for_port(self, port_name):
129+
def generate_pending_lldp_config_cmd_for_port(self, port_name, port_table_dict):
128130
"""
129131
For port `port_name`, look up the description and alias in the Config database,
130132
then form the appropriate lldpcli configuration command and run it.
@@ -135,27 +137,16 @@ class LldpManager(daemon_base.DaemonBase):
135137
# asic-to-asic communication in VOQ based chassis system. We do not configure LLDP on these.
136138
if port_name.startswith(inband_prefix()):
137139
return
138-
139-
# Retrieve all entires for this port from the Port table
140-
(status, fvp) = self.port_table.get(port_name)
141-
if status:
142-
# Convert list of tuples to a dictionary
143-
port_table_dict = dict(fvp)
144-
145-
# Get the port alias. If None or empty string, use port name instead
146-
port_alias = port_table_dict.get("alias")
147-
if not port_alias:
148-
self.log_info("Unable to retrieve port alias for port '{}'. Using port name instead.".format(port_name))
149-
port_alias = port_name
150-
151-
# Get the port description. If None or empty string, we'll skip this configuration
152-
port_desc = port_table_dict.get("description")
153-
154-
else:
155-
self.log_error("Port '{}' not found in {} table in Config DB. Using port name instead of port alias.".format(
156-
port_name, swsscommon.CFG_PORT_TABLE_NAME))
140+
141+
# Get the port alias. If None or empty string, use port name instead
142+
port_alias = port_table_dict.get("alias")
143+
if not port_alias:
144+
self.log_info("Unable to retrieve port alias for port '{}'. Using port name instead.".format(port_name))
157145
port_alias = port_name
158-
146+
147+
# Get the port description. If None or empty string, we'll skip this configuration
148+
port_desc = port_table_dict.get("description")
149+
159150
lldpcli_cmd = "lldpcli configure ports {0} lldp portidsubtype local {1}".format(port_name, port_alias)
160151

161152
# if there is a description available, also configure that
@@ -166,17 +157,25 @@ class LldpManager(daemon_base.DaemonBase):
166157

167158
# Add the command to our dictionary of pending commands, overwriting any
168159
# previous pending command for this port
169-
self.pending_cmds[port_name] = lldpcli_cmd
160+
self.pending_cmds[port_name] = { 'cmd': lldpcli_cmd, 'failed_count': 0}
170161

171162
def process_pending_cmds(self):
172163
# List of port names (keys of elements) to delete from self.pending_cmds
173164
to_delete = []
174165

175-
for (port_name, cmd) in self.pending_cmds.items():
176-
self.log_debug("Running command: '{}'".format(cmd))
166+
for (port_name, port_item) in self.pending_cmds.items():
167+
cmd = port_item['cmd']
177168

178-
rc, stderr = run_cmd(self, cmd)
169+
# check if linux port is up
170+
if not self.is_port_up(port_name):
171+
self.log_info("port %s is not up, continue"%port_name)
172+
continue
173+
174+
if 'failed_timestamp' in port_item and time.time()-port_item['failed_timestamp']<FAILED_CMD_TIMEOUT:
175+
continue
179176

177+
self.log_debug("Running command: '{}'".format(cmd))
178+
rc, stderr = run_cmd(self, cmd)
180179
# If the command succeeds, add the port name to our to_delete list.
181180
# We will delete this command from self.pending_cmds below.
182181
# If the command fails, log a message, but don't delete the command
@@ -185,8 +184,15 @@ class LldpManager(daemon_base.DaemonBase):
185184
if rc == 0:
186185
to_delete.append(port_name)
187186
else:
188-
self.log_warning("Command failed '{}': {}".format(cmd, stderr))
189-
187+
if port_item['failed_count'] >= RETRY_LIMIT:
188+
self.log_error("Command failed '{}': {} - command was failed {} times, disabling retry".format(cmd, stderr, RETRY_LIMIT+1))
189+
# not retrying again
190+
to_delete.append(port_name)
191+
else:
192+
self.pending_cmds[port_name]['failed_count'] += 1
193+
self.pending_cmds[port_name]['failed_timestamp'] = time.time()
194+
self.log_info("Command failed '{}': {} - cmd failed {} times, retrying again".format(cmd, stderr, self.pending_cmds[port_name]['failed_count']))
195+
190196
# Delete all successful commands from self.pending_cmds
191197
for port_name in to_delete:
192198
self.pending_cmds.pop(port_name, None)
@@ -268,10 +274,6 @@ class LldpManager(daemon_base.DaemonBase):
268274

269275
sel = swsscommon.Select()
270276

271-
# Subscribe to PORT table notifications in the Config DB
272-
sst_confdb = swsscommon.SubscriberStateTable(self.config_db, swsscommon.CFG_PORT_TABLE_NAME)
273-
sel.addSelectable(sst_confdb)
274-
275277
# Subscribe to PORT table notifications in the App DB
276278
sst_appdb = swsscommon.SubscriberStateTable(self.appl_db, swsscommon.APP_PORT_TABLE_NAME)
277279
sel.addSelectable(sst_appdb)
@@ -289,17 +291,6 @@ class LldpManager(daemon_base.DaemonBase):
289291
(state, c) = sel.select(SELECT_TIMEOUT_MS)
290292

291293
if state == swsscommon.Select.OBJECT:
292-
(key, op, fvp) = sst_confdb.pop()
293-
if fvp:
294-
fvp_dict = dict(fvp)
295-
296-
# handle config change
297-
if ("alias" in fvp_dict or "description" in fvp_dict) and (op in ["SET", "DEL"]):
298-
if self.is_port_up(key):
299-
self.generate_pending_lldp_config_cmd_for_port(key)
300-
else:
301-
self.pending_cmds.pop(key, None)
302-
303294
(key, op, fvp) = sst_mgmt_ip_confdb.pop()
304295
if key:
305296
self.lldp_process_mgmt_info_change(op, dict(fvp), key)
@@ -310,15 +301,16 @@ class LldpManager(daemon_base.DaemonBase):
310301

311302
(key, op, fvp) = sst_appdb.pop()
312303
if (key != "PortInitDone") and (key != "PortConfigDone"):
313-
if fvp:
314-
fvp_dict = dict(fvp)
315-
316-
# handle port status change
317-
if "oper_status" in fvp_dict:
318-
if "up" in fvp_dict.get("oper_status"):
319-
self.generate_pending_lldp_config_cmd_for_port(key)
304+
if op == "SET":
305+
if fvp:
306+
if "up" in dict(fvp).get("oper_status",""):
307+
self.generate_pending_lldp_config_cmd_for_port(key, dict(fvp))
320308
else:
321309
self.pending_cmds.pop(key, None)
310+
elif op == "DEL":
311+
self.pending_cmds.pop(key, None)
312+
else:
313+
self.log_error("unknown operation")
322314

323315
elif key == "PortInitDone":
324316
port_init_done = True

0 commit comments

Comments
 (0)