Skip to content

Commit 70ce6a3

Browse files
authored
Merge pull request #10 from sujinmkang/cold_reset
Add hardware reboot cause as actual reboot cause when soft reboot fails
2 parents f6ea036 + 8720561 commit 70ce6a3

File tree

2 files changed

+112
-19
lines changed

2 files changed

+112
-19
lines changed

scripts/determine-reboot-cause

+51-17
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,12 @@ def find_hardware_reboot_cause():
122122
else:
123123
sonic_logger.log_info("No reboot cause found from platform api")
124124

125-
hardware_reboot_cause = "{} ({})".format(hardware_reboot_cause_major, hardware_reboot_cause_minor)
125+
hardware_reboot_cause_minor_str = ""
126+
if hardware_reboot_cause_minor:
127+
hardware_reboot_cause_minor_str = " ({})".format(hardware_reboot_cause_minor)
128+
129+
hardware_reboot_cause = hardware_reboot_cause_major + hardware_reboot_cause_minor_str
130+
126131
return hardware_reboot_cause
127132

128133

@@ -158,6 +163,50 @@ def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
158163

159164
return reboot_cause_dict
160165

166+
def determine_reboot_cause():
167+
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
168+
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
169+
additional_reboot_info = "N/A"
170+
171+
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
172+
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
173+
174+
# 2. Check if the previous reboot was caused by hardware
175+
# If yes, the hardware reboot cause will be treated as the reboot cause
176+
hardware_reboot_cause = find_hardware_reboot_cause()
177+
178+
# 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
179+
# reboot info. We will use it as the previous cause.
180+
software_reboot_cause = find_software_reboot_cause()
181+
182+
# The main decision logic of the reboot cause:
183+
# If there is a valid hardware reboot cause indicated by platform API,
184+
# check the software reboot cause to add additional rebot cause.
185+
# If there is a reboot cause indicated by /proc/cmdline, and/or warmreboot/fastreboot/softreboot
186+
# the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
187+
# will be treated as the additional reboot cause
188+
# Elif there is a cmdline reboot cause,
189+
# the software_reboot_cause will be treated as the reboot cause if it's not unknown
190+
# otherwise, the cmdline_reboot_cause will be treated as the reboot cause if it's not none
191+
# Else the software_reboot_cause will be treated as the reboot cause
192+
if REBOOT_CAUSE_NON_HARDWARE not in hardware_reboot_cause:
193+
previous_reboot_cause = hardware_reboot_cause
194+
# Check if any software reboot was issued before this hardware reboot happened
195+
if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
196+
additional_reboot_info = software_reboot_cause
197+
elif proc_cmdline_reboot_cause is not None:
198+
additional_reboot_info = proc_cmdline_reboot_cause
199+
elif proc_cmdline_reboot_cause is not None:
200+
if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
201+
# Get the reboot cause from REBOOT_CAUSE_FILE
202+
previous_reboot_cause = software_reboot_cause
203+
else:
204+
previous_reboot_cause = proc_cmdline_reboot_cause
205+
else:
206+
previous_reboot_cause = software_reboot_cause
207+
208+
return previous_reboot_cause, additional_reboot_info
209+
161210

162211
def main():
163212
# Configure logger to log all messages INFO level and higher
@@ -177,22 +226,7 @@ def main():
177226
if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE):
178227
os.remove(PREVIOUS_REBOOT_CAUSE_FILE)
179228

180-
# This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
181-
# any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
182-
additional_reboot_info = "N/A"
183-
184-
# Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
185-
proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()
186-
187-
# If /proc/cmdline does not indicate reboot cause, check if the previous reboot was caused by hardware
188-
if proc_cmdline_reboot_cause is None:
189-
previous_reboot_cause = find_hardware_reboot_cause()
190-
if previous_reboot_cause.startswith(REBOOT_CAUSE_NON_HARDWARE):
191-
# If the reboot cause is non-hardware, get the reboot cause from REBOOT_CAUSE_FILE
192-
previous_reboot_cause = find_software_reboot_cause()
193-
else:
194-
# Get the reboot cause from REBOOT_CAUSE_FILE
195-
previous_reboot_cause = find_software_reboot_cause()
229+
previous_reboot_cause, additional_reboot_info = determine_reboot_cause()
196230

197231
# Current time
198232
reboot_cause_gen_time = str(datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

tests/determine-reboot-cause_test.py

+61-2
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,16 @@
5454
GEN_TIME_KERNEL_PANIC = "2021_3_28_13_48_49"
5555

5656

57+
REBOOT_CAUSE_UNKNOWN = "Unknown"
58+
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
59+
EXPECTED_NON_HARDWARE_REBOOT_CAUSE = {REBOOT_CAUSE_NON_HARDWARE, "N/A"}
60+
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
61+
EXPECTED_HARDWARE_REBOOT_CAUSE = {REBOOT_CAUSE_HARDWARE_OTHER, ""}
62+
5763
EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm-reboot"
5864
EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER = "User issued 'warm-reboot' command [User: admin, Time: Mon Nov 2 22:37:45 UTC 2020]"
5965
EXPECTED_FIND_FIRSTBOOT_VERSION = " (First boot of SONiC version 20191130.52)"
6066
EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_FIRSTBOOT = "Unknown (First boot of SONiC version 20191130.52)"
61-
EXPECTED_HARDWARE_REBOOT_CAUSE = {"warm-reboot", ""}
6267

6368
EXPECTED_WATCHDOG_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_15_08', 'cause': 'Watchdog', 'user': 'N/A', 'time': 'N/A'}
6469
EXPECTED_USER_REBOOT_CAUSE_DICT = {'comment': '', 'gen_time': '2020_10_22_03_14_07', 'cause': 'reboot', 'user': 'admin', 'time': 'Thu Oct 22 03:11:08 UTC 2020'}
@@ -104,7 +109,12 @@ def test_find_proc_cmdline_reboot_cause(self):
104109
def test_find_hardware_reboot_cause(self):
105110
with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", None)):
106111
result = determine_reboot_cause.find_hardware_reboot_cause()
107-
assert result == "Powerloss (None)"
112+
assert result == "Powerloss"
113+
114+
def test_find_hardware_reboot_cause_with_minor(self):
115+
with mock.patch("determine_reboot_cause.get_reboot_cause_from_platform", return_value=("Powerloss", "under-voltage")):
116+
result = determine_reboot_cause.find_hardware_reboot_cause()
117+
assert result == "Powerloss (under-voltage)"
108118

109119
def test_get_reboot_cause_dict_watchdog(self):
110120
reboot_cause_dict = determine_reboot_cause.get_reboot_cause_dict(REBOOT_CAUSE_WATCHDOG, "", GEN_TIME_WATCHDOG)
@@ -117,3 +127,52 @@ def test_get_reboot_cause_dict_user(self):
117127
def test_get_reboot_cause_dict_kernel_panic(self):
118128
reboot_cause_dict = determine_reboot_cause.get_reboot_cause_dict(REBOOT_CAUSE_KERNEL_PANIC, "", GEN_TIME_KERNEL_PANIC)
119129
assert reboot_cause_dict == EXPECTED_KERNEL_PANIC_REBOOT_CAUSE_DICT
130+
131+
def test_determine_reboot_cause_hardware(self):
132+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None):
133+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN):
134+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE):
135+
previous_reboot_cause, additional_reboot_info = determine_reboot_cause.determine_reboot_cause()
136+
assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE
137+
assert additional_reboot_info == "N/A"
138+
139+
def test_determine_reboot_cause_software(self):
140+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=None):
141+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER):
142+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE):
143+
previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause()
144+
assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER
145+
assert additional_info == "N/A"
146+
147+
def test_determine_reboot_cause_cmdline_software(self):
148+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE):
149+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER):
150+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE):
151+
previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause()
152+
assert previous_reboot_cause == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER
153+
assert additional_info == "N/A"
154+
155+
def test_determine_reboot_cause_cmdline_no_software(self):
156+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE):
157+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN):
158+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_NON_HARDWARE_REBOOT_CAUSE):
159+
previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause()
160+
assert previous_reboot_cause == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE
161+
assert additional_info == "N/A"
162+
163+
def test_determine_reboot_cause_cmdline_hardware(self):
164+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE):
165+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=REBOOT_CAUSE_UNKNOWN):
166+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE):
167+
previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause()
168+
assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE
169+
assert additional_info == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE
170+
171+
def test_determine_reboot_cause_software_hardware(self):
172+
with mock.patch("determine_reboot_cause.find_proc_cmdline_reboot_cause", return_value=EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE):
173+
with mock.patch("determine_reboot_cause.find_software_reboot_cause", return_value=EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER):
174+
with mock.patch("determine_reboot_cause.find_hardware_reboot_cause", return_value=EXPECTED_HARDWARE_REBOOT_CAUSE):
175+
previous_reboot_cause, additional_info = determine_reboot_cause.determine_reboot_cause()
176+
assert previous_reboot_cause == EXPECTED_HARDWARE_REBOOT_CAUSE
177+
assert additional_info == EXPECTED_FIND_SOFTWARE_REBOOT_CAUSE_USER
178+

0 commit comments

Comments
 (0)