Skip to content

Commit a56ecd7

Browse files
[DellEMC] S6100 - iTCO watchdog support and reboot cause determination changes (#9149)
Why I did it To support iTCO watchdog using watchdog APIs. How I did it Implemented a new watchdog class WatchdogTCO for interfacing with iTCO watchdog. Updated reboot cause determination logic. How to verify it Verified that the watchdog APIs' return values are as expected. Logs: UT_logs.txt
1 parent 3f7e77e commit a56ecd7

File tree

4 files changed

+186
-7
lines changed

4 files changed

+186
-7
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
CONSOLE_PORT=0x2f8
22
CONSOLE_DEV=1
3-
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich nos-config-part=/dev/sda12"
3+
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich,wdat_wdt acpi_no_watchdog=1 nos-config-part=/dev/sda12"

platform/broadcom/sonic-platform-modules-dell/s6100/scripts/track_reboot_reason.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
1515
NVRAM_DEVICE_FILE=/dev/nvram
1616
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
1717
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
18+
TCO_RESET_NVRAM_OFFSET=0x59
1819

1920
while [[ ! -d $SMF_DIR ]]
2021
do
@@ -27,6 +28,7 @@ do
2728
done
2829

2930
SMF_RESET=$(cat $SMF_RESET_REASON)
31+
TCO_WD_RESET=0
3032

3133
if [[ -d /host/reboot-cause/platform ]]; then
3234
reboot_dir_found=true
@@ -80,6 +82,18 @@ _get_smf_reset_register(){
8082
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
8183
fi
8284
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "RST value in NVRAM: $first_reset, $second_reset, $third_reset, $fourth_reset"
85+
86+
if [[ $BIOS_VERSION_MINOR -gt 8 ]]; then
87+
# Retrieve TCO reset status
88+
tco_nvram=$((16#$(nvram_rd_wr.py --get --offset $TCO_RESET_NVRAM_OFFSET | cut -d " " -f 2)))
89+
TCO_WD_RESET=$(($tco_nvram & 1))
90+
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "TCO status value in NVRAM: $TCO_WD_RESET"
91+
92+
# Clear TCO reset status in NVRAM
93+
tco_nvram=$(printf "%x" $(($tco_nvram & 0xfe)))
94+
nvram_rd_wr.py --set --val $tco_nvram --offset $TCO_RESET_NVRAM_OFFSET
95+
fi
96+
8397
# Clearing NVRAM values to holding next reset values
8498
nvram_rd_wr.py --set --val 0xee --offset 0x58
8599
nvram_rd_wr.py --set --val 0xee --offset 0x5c
@@ -183,7 +197,9 @@ update_mailbox_register(){
183197
&& [[ $SMF_MSS_VERSION_MAJOR -ge 2 ]] && [[ $SMF_MSS_VERSION_MINOR -ge 7 ]] \
184198
&& [[ $SMF_FPGA_VERSION_MAJOR -ge 1 ]] && [[ $SMF_FPGA_VERSION_MINOR -ge 4 ]]; then
185199

186-
if [[ $reason = "cc" ]]; then
200+
if [[ $TCO_WD_RESET = 1 ]]; then
201+
echo 0xdd > $MAILBOX_POWERON_REASON
202+
elif [[ $reason = "cc" ]]; then
187203
_is_software_reboot
188204
elif [[ $SMF_RESET = "11" ]]; then
189205
echo 0xee > $MAILBOX_POWERON_REASON
@@ -206,6 +222,8 @@ update_mailbox_register(){
206222
echo 0xee > $MAILBOX_POWERON_REASON
207223
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
208224
echo 0xdd > $MAILBOX_POWERON_REASON
225+
elif [[ $TCO_WD_RESET = 1 ]]; then
226+
echo 0xdd > $MAILBOX_POWERON_REASON
209227
elif [[ $reason = "cc" ]]; then
210228
_is_software_reboot
211229
else

platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/chassis.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from sonic_platform.module import Module
1818
from sonic_platform.thermal import Thermal
1919
from sonic_platform.component import Component
20-
from sonic_platform.watchdog import Watchdog
20+
from sonic_platform.watchdog import Watchdog, WatchdogTCO
2121
from sonic_platform.eeprom import Eeprom
2222
import time
2323
except ImportError as e:
@@ -93,7 +93,13 @@ def __init__(self):
9393
component = Component(i)
9494
self._component_list.append(component)
9595

96-
self._watchdog = Watchdog()
96+
bios_ver = self.get_component(0).get_firmware_version()
97+
bios_minor_ver = bios_ver.split("-")[-1]
98+
if bios_minor_ver.isdigit() and (int(bios_minor_ver) >= 9):
99+
self._watchdog = WatchdogTCO()
100+
else:
101+
self._watchdog = Watchdog()
102+
97103
self._transceiver_presence = self._get_transceiver_presence()
98104

99105
def _get_reboot_reason_smf_register(self):

platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/watchdog.py

Lines changed: 158 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#!/usr/bin/env python
2-
31
########################################################################
42
#
53
# DELLEMC S6100
@@ -10,20 +8,38 @@
108
########################################################################
119

1210
try:
11+
import array
12+
import ctypes
13+
import fcntl
14+
import glob
1315
import os
1416
import struct
15-
import ctypes
1617
from sonic_platform_base.watchdog_base import WatchdogBase
1718
except ImportError as e:
1819
raise ImportError(str(e) + "- required module not found")
1920

21+
# ioctl constants
22+
IOC_WRITE = 0x40000000
23+
IOC_READ = 0x80000000
24+
IOC_SIZE_INT = 0x00040000
25+
26+
WATCHDOG_IOCTL_BASE = ord('W')
27+
28+
WDIOC_SETOPTIONS = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 4
29+
WDIOC_KEEPALIVE = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 5
30+
WDIOC_SETTIMEOUT = IOC_READ | IOC_WRITE | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 6
31+
32+
WDIOS_DISABLECARD = 0x0001
33+
WDIOS_ENABLECARD = 0x0002
34+
2035

2136
class _timespec(ctypes.Structure):
2237
_fields_ = [
2338
('tv_sec', ctypes.c_long),
2439
('tv_nsec', ctypes.c_long)
2540
]
2641

42+
2743
class Watchdog(WatchdogBase):
2844
"""
2945
Abstract base class for interfacing with a hardware watchdog module
@@ -226,3 +242,142 @@ def get_remaining_time(self):
226242

227243
return 0
228244

245+
246+
class WatchdogTCO(WatchdogBase):
247+
"""
248+
Watchdog class for interfacing with iTCO watchdog
249+
"""
250+
251+
IDENTITY = "iTCO_wdt"
252+
253+
def __init__(self):
254+
255+
self.dev = None
256+
self.dev_name = None
257+
wd_sysfs_path = "/sys/class/watchdog"
258+
259+
for dev_file in glob.glob("/dev/watchdog*"):
260+
dev = os.path.basename(dev_file)
261+
dev_identity = self._read_file("{}/{}/identity".format(wd_sysfs_path, dev))
262+
if dev_identity == self.IDENTITY:
263+
self.dev_name = dev
264+
break
265+
266+
if self.dev_name is None:
267+
raise RuntimeError("{} is not initialized".format(self.IDENTITY))
268+
269+
self.state_file = "{}/{}/state".format(wd_sysfs_path, self.dev_name)
270+
self.timeout_file = "{}/{}/timeout".format(wd_sysfs_path, self.dev_name)
271+
self.timeleft_file = "{}/{}/timeleft".format(wd_sysfs_path, self.dev_name)
272+
273+
def __del__(self):
274+
if self.dev is not None:
275+
os.close(self.dev)
276+
277+
def _ioctl(self, request, arg=0, mutate_flag=True):
278+
"""
279+
Perform ioctl on watchdog device
280+
"""
281+
self._open_wd_dev()
282+
fcntl.ioctl(self.dev, request, arg, mutate_flag)
283+
284+
def _open_wd_dev(self):
285+
"""
286+
Open watchdog device file
287+
"""
288+
if self.dev is None:
289+
wd_dev = "/dev/{}".format(self.dev_name)
290+
self.dev = os.open(wd_dev, os.O_RDWR)
291+
292+
@staticmethod
293+
def _read_file(file_path):
294+
"""
295+
Read a file
296+
"""
297+
try:
298+
with open(file_path, "r") as fd:
299+
read_str = fd.read()
300+
except OSError:
301+
return -1
302+
303+
return read_str.strip()
304+
305+
def arm(self, seconds):
306+
"""
307+
Arm the hardware watchdog with a timeout of <seconds> seconds.
308+
If the watchdog is currently armed, calling this function will
309+
simply reset the timer to the provided value. If the underlying
310+
hardware does not support the value provided in <seconds>, this
311+
method should arm the watchdog with the *next greater*
312+
available value.
313+
314+
Returns:
315+
An integer specifying the *actual* number of seconds the
316+
watchdog was armed with. On failure returns -1.
317+
"""
318+
if seconds < 0 or seconds > 0x3ff:
319+
return -1
320+
if seconds < 4:
321+
seconds = 4
322+
323+
try:
324+
timeout = int(self._read_file(self.timeout_file))
325+
if timeout != seconds:
326+
buf = array.array('I', [seconds])
327+
self._ioctl(WDIOC_SETTIMEOUT, buf)
328+
timeout = int(buf[0])
329+
330+
if self.is_armed():
331+
self._ioctl(WDIOC_KEEPALIVE)
332+
else:
333+
buf = array.array('h', [WDIOS_ENABLECARD])
334+
self._ioctl(WDIOC_SETOPTIONS, buf, False)
335+
except OSError:
336+
return -1
337+
else:
338+
return timeout
339+
340+
def disarm(self):
341+
"""
342+
Disarm the hardware watchdog
343+
344+
Returns:
345+
A boolean, True if watchdog is disarmed successfully, False
346+
if not
347+
"""
348+
disarmed = True
349+
if self.is_armed():
350+
try:
351+
buf = array.array('h', [WDIOS_DISABLECARD])
352+
self._ioctl(WDIOC_SETOPTIONS, buf, False)
353+
except OSError:
354+
disarmed = False
355+
356+
return disarmed
357+
358+
def is_armed(self):
359+
"""
360+
Retrieves the armed state of the hardware watchdog.
361+
362+
Returns:
363+
A boolean, True if watchdog is armed, False if not
364+
"""
365+
state = self._read_file(self.state_file)
366+
return state == "active"
367+
368+
def get_remaining_time(self):
369+
"""
370+
If the watchdog is armed, retrieve the number of seconds
371+
remaining on the watchdog timer
372+
373+
Returns:
374+
An integer specifying the number of seconds remaining on
375+
their watchdog timer. If the watchdog is not armed, returns
376+
-1.
377+
378+
"""
379+
timeleft = -1
380+
if self.is_armed():
381+
timeleft = int(self._read_file(self.timeleft_file))
382+
383+
return timeleft

0 commit comments

Comments
 (0)