Skip to content

Commit b71e507

Browse files
Merge remote-tracking branch 'origin/master' into HEAD
2 parents 3437e35 + f9af7ae commit b71e507

File tree

5 files changed

+395
-31
lines changed

5 files changed

+395
-31
lines changed

scripts/determine-reboot-cause

+51-17
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,12 @@ def find_hardware_reboot_cause():
122122
else:
123123
sonic_logger.log_info("No reboot cause found from platform api")
124124

125-
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
125+
hardware_reboot_cause_minor_str = ""
126+
if hardware_reboot_cause_minor:
127+
hardware_reboot_cause_minor_str = " ({})".format(hardware_reboot_cause_minor)
128+
129+
hardware_reboot_cause = hardware_reboot_cause_major + hardware_reboot_cause_minor_str
130+
126131
return hardware_reboot_cause
127132

128133

@@ -158,6 +163,50 @@ def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
158163

159164
return reboot_cause_dict
160165

166+
def determine_reboot_cause():
167+
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
168+
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
169+
additional_reboot_info = "N/A"
170+
171+
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
172+
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
173+
174+
# 2. Check if the previous reboot was caused by hardware
175+
# If yes, the hardware reboot cause will be treated as the reboot cause
176+
hardware_reboot_cause = find_hardware_reboot_cause()
177+
178+
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
179+
# reboot info. We will use it as the previous cause.
180+
software_reboot_cause = find_software_reboot_cause()
181+
182+
# The main decision logic of the reboot cause:
183+
# If there is a valid hardware reboot cause indicated by platform API,
184+
# check the software reboot cause to add additional rebot cause.
185+
# If there is a reboot cause indicated by /proc/cmdline, and/or warmreboot/fastreboot/softreboot
186+
# the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
187+
# will be treated as the additional reboot cause
188+
# Elif there is a cmdline reboot cause,
189+
# the software_reboot_cause will be treated as the reboot cause if it's not unknown
190+
# otherwise, the cmdline_reboot_cause will be treated as the reboot cause if it's not none
191+
# Else the software_reboot_cause will be treated as the reboot cause
192+
if REBOOT_CAUSE_NON_HARDWARE not in hardware_reboot_cause:
193+
previous_reboot_cause = hardware_reboot_cause
194+
# Check if any software reboot was issued before this hardware reboot happened
195+
if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
196+
additional_reboot_info = software_reboot_cause
197+
elif proc_cmdline_reboot_cause is not None:
198+
additional_reboot_info = proc_cmdline_reboot_cause
199+
elif proc_cmdline_reboot_cause is not None:
200+
if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
201+
# Get the reboot cause from REBOOT_CAUSE_FILE
202+
previous_reboot_cause = software_reboot_cause
203+
else:
204+
previous_reboot_cause = proc_cmdline_reboot_cause
205+
else:
206+
previous_reboot_cause = software_reboot_cause
207+
208+
return previous_reboot_cause, additional_reboot_info
209+
161210

162211
def main():
163212
# Configure logger to log all messages INFO level and higher
@@ -177,22 +226,7 @@ def main():
177226
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
178227
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
179228

180-
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
181-
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
182-
additional_reboot_info = "N/A"
183-
184-
# Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
185-
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
186-
187-
# If /proc/cmdline does not indicate reboot cause, check if the previous reboot was caused by hardware
188-
if proc_cmdline_reboot_cause is None:
189-
previous_reboot_cause = find_hardware_reboot_cause()
190-
if previous_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE):
191-
# If the reboot cause is non-hardware, get the reboot cause from REBOOT_CAUSE_FILE
192-
previous_reboot_cause = find_software_reboot_cause()
193-
else:
194-
# Get the reboot cause from REBOOT_CAUSE_FILE
195-
previous_reboot_cause = find_software_reboot_cause()
229+
previous_reboot_cause, additional_reboot_info = determine_reboot_cause()
196230

197231
# Current time
198232
reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

scripts/hostcfgd

+166-7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import re
1212
import jinja2
1313
from sonic_py_common import device_info
1414
from swsscommon.swsscommon import ConfigDBConnector, DBConnector, Table
15+
from swsscommon import swsscommon
1516

1617
# FILE
1718
PAM_AUTH_CONF = "/etc/pam.d/common-auth-sonic"
@@ -1253,6 +1254,143 @@ class PamLimitsCfg(object):
12531254
"modify pam_limits config file failed with exception: {}"
12541255
.format(e))
12551256

1257+
class DeviceMetaCfg(object):
1258+
"""
1259+
DeviceMetaCfg Config Daemon
1260+
Handles changes in DEVICE_METADATA table.
1261+
1) Handle hostname change
1262+
"""
1263+
1264+
def __init__(self):
1265+
self.hostname = ''
1266+
1267+
def load(self, dev_meta={}):
1268+
# Get hostname initial
1269+
self.hostname = dev_meta.get('localhost', {}).get('hostname', '')
1270+
syslog.syslog(syslog.LOG_DEBUG, f'Initial hostname: {self.hostname}')
1271+
1272+
def hostname_update(self, data):
1273+
"""
1274+
Apply hostname handler.
1275+
1276+
Args:
1277+
data: Read table's key's data.
1278+
"""
1279+
syslog.syslog(syslog.LOG_DEBUG, 'DeviceMetaCfg: hostname update')
1280+
new_hostname = data.get('hostname')
1281+
1282+
# Restart hostname-config service when hostname was changed.
1283+
# Empty not allowed
1284+
if new_hostname and new_hostname != self.hostname:
1285+
syslog.syslog(syslog.LOG_INFO, 'DeviceMetaCfg: Set new hostname: {}'
1286+
.format(new_hostname))
1287+
self.hostname = new_hostname
1288+
try:
1289+
run_cmd('sudo service hostname-config restart', True, True)
1290+
except subprocess.CalledProcessError as e:
1291+
syslog.syslog(syslog.LOG_ERR, 'DeviceMetaCfg: Failed to set new'
1292+
' hostname: {}'.format(e))
1293+
return
1294+
1295+
run_cmd('sudo monit reload')
1296+
else:
1297+
msg = 'Hostname was not updated: '
1298+
msg += 'Already set up' if new_hostname else 'Empty not allowed'
1299+
syslog.syslog(syslog.LOG_ERR, msg)
1300+
1301+
1302+
class MgmtIfaceCfg(object):
1303+
"""
1304+
MgmtIfaceCfg Config Daemon
1305+
Handles changes in MGMT_INTERFACE, MGMT_VRF_CONFIG tables.
1306+
1) Handle change of interface ip
1307+
2) Handle change of management VRF state
1308+
"""
1309+
1310+
def __init__(self):
1311+
self.iface_config_data = {}
1312+
self.mgmt_vrf_enabled = ''
1313+
1314+
def load(self, mgmt_iface={}, mgmt_vrf={}):
1315+
# Get initial data
1316+
self.iface_config_data = mgmt_iface
1317+
self.mgmt_vrf_enabled = mgmt_vrf.get('mgmtVrfEnabled', '')
1318+
syslog.syslog(syslog.LOG_DEBUG,
1319+
f'Initial mgmt interface conf: {self.iface_config_data}')
1320+
syslog.syslog(syslog.LOG_DEBUG,
1321+
f'Initial mgmt VRF state: {self.mgmt_vrf_enabled}')
1322+
1323+
def update_mgmt_iface(self, iface, key, data):
1324+
"""Handle update management interface config
1325+
"""
1326+
syslog.syslog(syslog.LOG_DEBUG, 'MgmtIfaceCfg: mgmt iface update')
1327+
1328+
# Restart management interface service when config was changed
1329+
if data != self.iface_config_data.get(key):
1330+
cfg = {key: data}
1331+
syslog.syslog(syslog.LOG_INFO, f'MgmtIfaceCfg: Set new interface '
1332+
f'config {cfg} for {iface}')
1333+
try:
1334+
run_cmd('sudo systemctl restart interfaces-config', True, True)
1335+
run_cmd('sudo systemctl restart ntp-config', True, True)
1336+
except subprocess.CalledProcessError:
1337+
syslog.syslog(syslog.LOG_ERR, f'Failed to restart management '
1338+
'interface services')
1339+
return
1340+
1341+
self.iface_config_data[key] = data
1342+
1343+
def update_mgmt_vrf(self, data):
1344+
"""Handle update management VRF state
1345+
"""
1346+
syslog.syslog(syslog.LOG_DEBUG, 'MgmtIfaceCfg: mgmt vrf state update')
1347+
1348+
# Restart mgmt vrf services when mgmt vrf config was changed.
1349+
# Empty not allowed.
1350+
enabled = data.get('mgmtVrfEnabled', '')
1351+
if not enabled or enabled == self.mgmt_vrf_enabled:
1352+
return
1353+
1354+
syslog.syslog(syslog.LOG_INFO, f'Set mgmt vrf state {enabled}')
1355+
1356+
# Restart related vrfs services
1357+
try:
1358+
run_cmd('service ntp stop', True, True)
1359+
run_cmd('systemctl restart interfaces-config', True, True)
1360+
run_cmd('service ntp start', True, True)
1361+
except subprocess.CalledProcessError:
1362+
syslog.syslog(syslog.LOG_ERR, f'Failed to restart management vrf '
1363+
'services')
1364+
return
1365+
1366+
# Remove mgmt if route
1367+
if enabled == 'true':
1368+
"""
1369+
The regular expression for grep in below cmd is to match eth0 line
1370+
in /proc/net/route, sample file:
1371+
$ cat /proc/net/route
1372+
Iface Destination Gateway Flags RefCnt Use
1373+
eth0 00000000 01803B0A 0003 0 0
1374+
#################### Line break here ####################
1375+
Metric Mask MTU Window IRTT
1376+
202 00000000 0 0 0
1377+
"""
1378+
try:
1379+
run_cmd(r"""cat /proc/net/route | grep -E \"eth0\s+"""
1380+
r"""00000000\s+[0-9A-Z]+\s+[0-9]+\s+[0-9]+\s+[0-9]+"""
1381+
r"""\s+202\" | wc -l""",
1382+
True, True)
1383+
except subprocess.CalledProcessError:
1384+
syslog.syslog(syslog.LOG_ERR, 'MgmtIfaceCfg: Could not delete '
1385+
'eth0 route')
1386+
return
1387+
1388+
run_cmd("ip -4 route del default dev eth0 metric 202", False)
1389+
1390+
# Update cache
1391+
self.mgmt_vrf_enabled = enabled
1392+
1393+
12561394
class HostConfigDaemon:
12571395
def __init__(self):
12581396
# Just a sanity check to verify if the CONFIG_DB has been initialized
@@ -1284,7 +1422,6 @@ class HostConfigDaemon:
12841422
self.is_multi_npu = device_info.is_multi_npu()
12851423

12861424
# Initialize AAACfg
1287-
self.hostname_cache=""
12881425
self.aaacfg = AaaCfg()
12891426

12901427
# Initialize PasswHardening
@@ -1294,6 +1431,12 @@ class HostConfigDaemon:
12941431
self.pamLimitsCfg = PamLimitsCfg(self.config_db)
12951432
self.pamLimitsCfg.update_config_file()
12961433

1434+
# Initialize DeviceMetaCfg
1435+
self.devmetacfg = DeviceMetaCfg()
1436+
1437+
# Initialize MgmtIfaceCfg
1438+
self.mgmtifacecfg = MgmtIfaceCfg()
1439+
12971440
def load(self, init_data):
12981441
features = init_data['FEATURE']
12991442
aaa = init_data['AAA']
@@ -1306,21 +1449,21 @@ class HostConfigDaemon:
13061449
ntp_global = init_data['NTP']
13071450
kdump = init_data['KDUMP']
13081451
passwh = init_data['PASSW_HARDENING']
1452+
dev_meta = init_data.get(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, {})
1453+
mgmt_ifc = init_data.get(swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME, {})
1454+
mgmt_vrf = init_data.get(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME, {})
13091455

13101456
self.feature_handler.sync_state_field(features)
13111457
self.aaacfg.load(aaa, tacacs_global, tacacs_server, radius_global, radius_server)
13121458
self.iptables.load(lpbk_table)
13131459
self.ntpcfg.load(ntp_global, ntp_server)
13141460
self.kdumpCfg.load(kdump)
13151461
self.passwcfg.load(passwh)
1316-
1317-
dev_meta = self.config_db.get_table('DEVICE_METADATA')
1318-
if 'localhost' in dev_meta:
1319-
if 'hostname' in dev_meta['localhost']:
1320-
self.hostname_cache = dev_meta['localhost']['hostname']
1462+
self.devmetacfg.load(dev_meta)
1463+
self.mgmtifacecfg.load(mgmt_ifc, mgmt_vrf)
13211464

13221465
# Update AAA with the hostname
1323-
self.aaacfg.hostname_update(self.hostname_cache)
1466+
self.aaacfg.hostname_update(self.devmetacfg.hostname)
13241467

13251468
def __get_intf_name(self, key):
13261469
if isinstance(key, tuple) and key:
@@ -1370,6 +1513,10 @@ class HostConfigDaemon:
13701513
mgmt_intf_name = self.__get_intf_name(key)
13711514
self.aaacfg.handle_radius_source_intf_ip_chg(mgmt_intf_name)
13721515
self.aaacfg.handle_radius_nas_ip_chg(mgmt_intf_name)
1516+
self.mgmtifacecfg.update_mgmt_iface(mgmt_intf_name, key, data)
1517+
1518+
def mgmt_vrf_handler(self, key, op, data):
1519+
self.mgmtifacecfg.update_mgmt_vrf(data)
13731520

13741521
def lpbk_handler(self, key, op, data):
13751522
key = ConfigDBConnector.deserialize_key(key)
@@ -1409,6 +1556,10 @@ class HostConfigDaemon:
14091556
syslog.syslog(syslog.LOG_INFO, 'Kdump handler...')
14101557
self.kdumpCfg.kdump_update(key, data)
14111558

1559+
def device_metadata_handler(self, key, op, data):
1560+
syslog.syslog(syslog.LOG_INFO, 'DeviceMeta handler...')
1561+
self.devmetacfg.hostname_update(data)
1562+
14121563
def wait_till_system_init_done(self):
14131564
# No need to print the output in the log file so using the "--quiet"
14141565
# flag
@@ -1448,6 +1599,14 @@ class HostConfigDaemon:
14481599
self.config_db.subscribe('PORTCHANNEL_INTERFACE', make_callback(self.portchannel_intf_handler))
14491600
self.config_db.subscribe('INTERFACE', make_callback(self.phy_intf_handler))
14501601

1602+
# Handle DEVICE_MEATADATA changes
1603+
self.config_db.subscribe(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME,
1604+
make_callback(self.device_metadata_handler))
1605+
1606+
# Handle MGMT_VRF_CONFIG changes
1607+
self.config_db.subscribe(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME,
1608+
make_callback(self.mgmt_vrf_handler))
1609+
14511610
syslog.syslog(syslog.LOG_INFO,
14521611
"Waiting for systemctl to finish initialization")
14531612
self.wait_till_system_init_done()

0 commit comments

Comments
 (0)