Skip to content

[DellEMC] S6100 - iTCO watchdog support and reboot cause determination changes #9149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion device/dell/x86_64-dell_s6100_c2538-r0/installer.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CONSOLE_PORT=0x2f8
CONSOLE_DEV=1
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich nos-config-part=/dev/sda12"
ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="module_blacklist=gpio_ich,wdat_wdt acpi_no_watchdog=1 nos-config-part=/dev/sda12"
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
NVRAM_DEVICE_FILE=/dev/nvram
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/
TCO_RESET_NVRAM_OFFSET=0x59

while [[ ! -d $SMF_DIR ]]
do
Expand All @@ -27,6 +28,7 @@ do
done

SMF_RESET=$(cat $SMF_RESET_REASON)
TCO_WD_RESET=0

if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
Expand Down Expand Up @@ -80,6 +82,18 @@ _get_smf_reset_register(){
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
fi
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "RST value in NVRAM: $first_reset, $second_reset, $third_reset, $fourth_reset"

if [[ $BIOS_VERSION_MINOR -gt 8 ]]; then
# Retrieve TCO reset status
tco_nvram=$((16#$(nvram_rd_wr.py --get --offset $TCO_RESET_NVRAM_OFFSET | cut -d " " -f 2)))
TCO_WD_RESET=$(($tco_nvram & 1))
logger -p user.info -t DELL_S6100_REBOOT_CAUSE "TCO status value in NVRAM: $TCO_WD_RESET"

# Clear TCO reset status in NVRAM
tco_nvram=$(printf "%x" $(($tco_nvram & 0xfe)))
nvram_rd_wr.py --set --val $tco_nvram --offset $TCO_RESET_NVRAM_OFFSET
fi

# Clearing NVRAM values to holding next reset values
nvram_rd_wr.py --set --val 0xee --offset 0x58
nvram_rd_wr.py --set --val 0xee --offset 0x5c
Expand Down Expand Up @@ -183,7 +197,9 @@ update_mailbox_register(){
&& [[ $SMF_MSS_VERSION_MAJOR -ge 2 ]] && [[ $SMF_MSS_VERSION_MINOR -ge 7 ]] \
&& [[ $SMF_FPGA_VERSION_MAJOR -ge 1 ]] && [[ $SMF_FPGA_VERSION_MINOR -ge 4 ]]; then

if [[ $reason = "cc" ]]; then
if [[ $TCO_WD_RESET = 1 ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
_is_software_reboot
elif [[ $SMF_RESET = "11" ]]; then
echo 0xee > $MAILBOX_POWERON_REASON
Expand All @@ -206,6 +222,8 @@ update_mailbox_register(){
echo 0xee > $MAILBOX_POWERON_REASON
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $TCO_WD_RESET = 1 ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
_is_software_reboot
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from sonic_platform.module import Module
from sonic_platform.thermal import Thermal
from sonic_platform.component import Component
from sonic_platform.watchdog import Watchdog
from sonic_platform.watchdog import Watchdog, WatchdogTCO
from sonic_platform.eeprom import Eeprom
import time
except ImportError as e:
Expand Down Expand Up @@ -93,7 +93,13 @@ def __init__(self):
component = Component(i)
self._component_list.append(component)

self._watchdog = Watchdog()
bios_ver = self.get_component(0).get_firmware_version()
bios_minor_ver = bios_ver.split("-")[-1]
if bios_minor_ver.isdigit() and (int(bios_minor_ver) >= 9):
self._watchdog = WatchdogTCO()
else:
self._watchdog = Watchdog()

self._transceiver_presence = self._get_transceiver_presence()

def _get_reboot_reason_smf_register(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python

########################################################################
#
# DELLEMC S6100
Expand All @@ -10,20 +8,38 @@
########################################################################

try:
import array
import ctypes
import fcntl
import glob
import os
import struct
import ctypes
from sonic_platform_base.watchdog_base import WatchdogBase
except ImportError as e:
raise ImportError(str(e) + "- required module not found")

# ioctl constants
IOC_WRITE = 0x40000000
IOC_READ = 0x80000000
IOC_SIZE_INT = 0x00040000

WATCHDOG_IOCTL_BASE = ord('W')

WDIOC_SETOPTIONS = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 4
WDIOC_KEEPALIVE = IOC_READ | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 5
WDIOC_SETTIMEOUT = IOC_READ | IOC_WRITE | IOC_SIZE_INT | (WATCHDOG_IOCTL_BASE << 8) | 6

WDIOS_DISABLECARD = 0x0001
WDIOS_ENABLECARD = 0x0002


class _timespec(ctypes.Structure):
_fields_ = [
('tv_sec', ctypes.c_long),
('tv_nsec', ctypes.c_long)
]


class Watchdog(WatchdogBase):
"""
Abstract base class for interfacing with a hardware watchdog module
Expand Down Expand Up @@ -226,3 +242,142 @@ def get_remaining_time(self):

return 0


class WatchdogTCO(WatchdogBase):
"""
Watchdog class for interfacing with iTCO watchdog
"""

IDENTITY = "iTCO_wdt"

def __init__(self):

self.dev = None
self.dev_name = None
wd_sysfs_path = "/sys/class/watchdog"

for dev_file in glob.glob("/dev/watchdog*"):
dev = os.path.basename(dev_file)
dev_identity = self._read_file("{}/{}/identity".format(wd_sysfs_path, dev))
if dev_identity == self.IDENTITY:
self.dev_name = dev
break

if self.dev_name is None:
raise RuntimeError("{} is not initialized".format(self.IDENTITY))

self.state_file = "{}/{}/state".format(wd_sysfs_path, self.dev_name)
self.timeout_file = "{}/{}/timeout".format(wd_sysfs_path, self.dev_name)
self.timeleft_file = "{}/{}/timeleft".format(wd_sysfs_path, self.dev_name)

def __del__(self):
if self.dev is not None:
os.close(self.dev)

def _ioctl(self, request, arg=0, mutate_flag=True):
"""
Perform ioctl on watchdog device
"""
self._open_wd_dev()
fcntl.ioctl(self.dev, request, arg, mutate_flag)

def _open_wd_dev(self):
"""
Open watchdog device file
"""
if self.dev is None:
wd_dev = "/dev/{}".format(self.dev_name)
self.dev = os.open(wd_dev, os.O_RDWR)

@staticmethod
def _read_file(file_path):
"""
Read a file
"""
try:
with open(file_path, "r") as fd:
read_str = fd.read()
except OSError:
return -1

return read_str.strip()

def arm(self, seconds):
"""
Arm the hardware watchdog with a timeout of <seconds> seconds.
If the watchdog is currently armed, calling this function will
simply reset the timer to the provided value. If the underlying
hardware does not support the value provided in <seconds>, this
method should arm the watchdog with the *next greater*
available value.

Returns:
An integer specifying the *actual* number of seconds the
watchdog was armed with. On failure returns -1.
"""
if seconds < 0 or seconds > 0x3ff:
return -1
if seconds < 4:
seconds = 4

try:
timeout = int(self._read_file(self.timeout_file))
if timeout != seconds:
buf = array.array('I', [seconds])
self._ioctl(WDIOC_SETTIMEOUT, buf)
timeout = int(buf[0])

if self.is_armed():
self._ioctl(WDIOC_KEEPALIVE)
else:
buf = array.array('h', [WDIOS_ENABLECARD])
self._ioctl(WDIOC_SETOPTIONS, buf, False)
except OSError:
return -1
else:
return timeout

def disarm(self):
"""
Disarm the hardware watchdog

Returns:
A boolean, True if watchdog is disarmed successfully, False
if not
"""
disarmed = True
if self.is_armed():
try:
buf = array.array('h', [WDIOS_DISABLECARD])
self._ioctl(WDIOC_SETOPTIONS, buf, False)
except OSError:
disarmed = False

return disarmed

def is_armed(self):
"""
Retrieves the armed state of the hardware watchdog.

Returns:
A boolean, True if watchdog is armed, False if not
"""
state = self._read_file(self.state_file)
return state == "active"

def get_remaining_time(self):
"""
If the watchdog is armed, retrieve the number of seconds
remaining on the watchdog timer

Returns:
An integer specifying the number of seconds remaining on
their watchdog timer. If the watchdog is not armed, returns
-1.

"""
timeleft = -1
if self.is_armed():
timeleft = int(self._read_file(self.timeleft_file))

return timeleft