Skip to content

Commit a4dd0aa

Browse files
[mellanox] add hardware watchdog script (#4274)
admin@sonic:~$ sudo hw-management-wd.sh Usage: hw-management-wd.sh start [timeout] | stop | tleft | check_reset | help start - start watchdog timeout is optional. Default value will be used in case if it's omitted timeout provided in seconds stop - stop watchdog tleft - check watchdog timeout left check_reset - check if previous reset was caused by watchdog Prints only in case of watchdog reset help -this help Signed-off-by: Stepan Blyschak <[email protected]>
1 parent c8d8f1c commit a4dd0aa

File tree

5 files changed

+198
-1
lines changed

5 files changed

+198
-1
lines changed

files/build_templates/sonic_debian_extension.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ for MLNX_CPLD_ARCHIVE in $MLNX_CPLD_ARCHIVES; do
364364
done
365365
sudo cp target/files/$ISSU_VERSION_FILE $FILESYSTEM_ROOT/etc/mlnx/issu-version
366366
sudo cp target/files/$MLNX_FFB_SCRIPT $FILESYSTEM_ROOT/usr/bin/mlnx-ffb.sh
367+
sudo cp target/files/$HW_MANAGEMENT_WD_SCRIPT $FILESYSTEM_ROOT/usr/bin/$HW_MANAGEMENT_WD_SCRIPT
367368
j2 platform/mellanox/mlnx-fw-upgrade.j2 | sudo tee $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
368369
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
369370

platform/mellanox/hw-management-wd.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Mellanox script for enabling/disabling hardware watchdog
2+
3+
HW_MANAGEMENT_WD_SCRIPT = hw-management-wd.sh
4+
$(HW_MANAGEMENT_WD_SCRIPT)_PATH = platform/mellanox/
5+
SONIC_COPY_FILES += $(HW_MANAGEMENT_WD_SCRIPT)
6+
7+
export HW_MANAGEMENT_WD_SCRIPT
8+

platform/mellanox/hw-management-wd.sh

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
#!/bin/sh
2+
########################################################################
3+
# Copyright (c) 2020 Mellanox Technologies. All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions are met:
7+
#
8+
# 1. Redistributions of source code must retain the above copyright
9+
# notice, this list of conditions and the following disclaimer.
10+
# 2. Redistributions in binary form must reproduce the above copyright
11+
# notice, this list of conditions and the following disclaimer in the
12+
# documentation and/or other materials provided with the distribution.
13+
# 3. Neither the names of the copyright holders nor the names of its
14+
# contributors may be used to endorse or promote products derived from
15+
# this software without specific prior written permission.
16+
#
17+
# Alternatively, this software may be distributed under the terms of the
18+
# GNU General Public License ("GPL") version 2 as published by the Free
19+
# Software Foundation.
20+
#
21+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31+
# POSSIBILITY OF SUCH DAMAGE.
32+
#
33+
34+
# Description: hw-management watchdog actions script.
35+
# It uses direct access to CPLD watchdog mechanism.
36+
# 1. Start watchdog to provided timeout or default timeout.
37+
# 2. Stop watchdog.
38+
# 3. Check if previous reset was caused by watchdog.
39+
# 4. Check watchdog timeleft.
40+
41+
CPLD_LPC_BASE=0x2500
42+
CPLD_LPC_CPBLT_REG=0xf9
43+
CPLD_LPC_RESET_CAUSE_REG=0x1d
44+
CPLD_LPC_WD2_TMR_REG=0xcd
45+
CPLD_LPC_WD2_ACT_REG=0xcf
46+
CPLD_LPC_WD_CPBLT_BIT=6
47+
CPLD_LPC_WD_RESET_CAUSE_BIT=6
48+
CPLD_LPC_WD_RESET=1
49+
WD3_DFLT_TO=600
50+
WD_TYPE3_MAX_TO=65535
51+
52+
wd_max_to=
53+
wd_act_reg=
54+
wd_tmr_reg=
55+
wd_tleft_reg=
56+
wd_tmr_reg_len=
57+
wd_to=
58+
59+
action=$1
60+
param_num=$#
61+
62+
usage()
63+
{
64+
echo "Usage: $(basename "$0") start [timeout] | stop | tleft | check_reset | help"
65+
echo "start - start watchdog"
66+
echo " timeout is optional. Default value will be used in case if it's omitted"
67+
echo " timeout provided in seconds"
68+
echo "stop - stop watchdog"
69+
echo "tleft - check watchdog timeout left"
70+
echo "check_reset - check if previous reset was caused by watchdog"
71+
echo " Prints only in case of watchdog reset"
72+
echo "help -this help"
73+
}
74+
75+
check_watchdog_type()
76+
{
77+
reg=$((CPLD_LPC_BASE+CPLD_LPC_CPBLT_REG))
78+
wd_cpblt=$(iorw -r -b $reg -l 1 | awk '{print $5}')
79+
wd_cpblt=$((wd_cpblt>>=CPLD_LPC_WD_CPBLT_BIT))
80+
wd_cpblt=$((wd_cpblt&=1))
81+
82+
if [ $wd_cpblt -eq 0 ]; then
83+
wd_type=3
84+
wd_to=$WD3_DFLT_TO
85+
wd_max_to=$WD_TYPE3_MAX_TO
86+
wd_act_reg=$CPLD_LPC_WD2_ACT_REG
87+
wd_tmr_reg=$CPLD_LPC_WD2_TMR_REG
88+
wd_tleft_reg=$CPLD_LPC_WD2_TMR_REG
89+
wd_tmr_reg_len=2
90+
else
91+
board=$(cat /sys/devices/virtual/dmi/id/board_name)
92+
case $board in
93+
VMOD0001|VMOD0003)
94+
wd_type=1
95+
;;
96+
*)
97+
wd_type=2
98+
;;
99+
esac
100+
echo "Watchdog type ${wd_type} isn't supported by this script."
101+
exit 1
102+
fi
103+
}
104+
105+
check_watchdog_timeout()
106+
{
107+
if [ $param_num -ge 2 ]; then
108+
wd_to=$2
109+
fi
110+
if [ $wd_to -gt $wd_max_to ]; then
111+
echo "Error: Watchdog timeout ${wd_to} exceeds max timeout ${wd_max_to}"
112+
exit 1
113+
fi
114+
}
115+
116+
start_watchdog()
117+
{
118+
reg=$((CPLD_LPC_BASE+wd_tmr_reg))
119+
iorw -w -b $reg -v $wd_to -l $wd_tmr_reg_len
120+
reg=$((CPLD_LPC_BASE+wd_act_reg))
121+
val=$CPLD_LPC_WD_RESET
122+
iorw -w -b $reg -v $val -l 1
123+
echo "Watchdog is started, timeout ${wd_to} sec."
124+
}
125+
126+
stop_watchdog()
127+
{
128+
reg=$((CPLD_LPC_BASE+wd_act_reg))
129+
iorw -w -b $reg -v 0 -l 1
130+
reg=$((CPLD_LPC_BASE+wd_tmr_reg))
131+
iorw -w -b $reg -v 0 -l 1
132+
reg=$((reg+1))
133+
iorw -w -b $reg -v 0 -l 1
134+
echo "Watchdog is stopped"
135+
}
136+
137+
time_left()
138+
{
139+
reg=$((CPLD_LPC_BASE+wd_tleft_reg))
140+
val=$(iorw -r -b $reg -l 2 | awk '{print $3 $2}')
141+
val=$(printf "0x%s" ${val})
142+
printf "Watchdog timeleft: %d sec.\n" ${val}
143+
}
144+
145+
check_reset()
146+
{
147+
reg=$((CPLD_LPC_BASE+CPLD_LPC_RESET_CAUSE_REG))
148+
val=$(iorw -r -b $reg -l 1 | awk '{print $5}')
149+
val=$((val>>=CPLD_LPC_WD_RESET_CAUSE_BIT))
150+
val=$((val&=1))
151+
if [ $val -eq 1 ]; then
152+
echo "Watchdog was caused reset in previous boot"
153+
fi
154+
}
155+
156+
check_watchdog_type
157+
158+
case $action in
159+
start)
160+
check_watchdog_timeout "$@"
161+
start_watchdog
162+
;;
163+
stop)
164+
stop_watchdog
165+
;;
166+
tleft)
167+
time_left
168+
;;
169+
check_reset)
170+
check_reset
171+
;;
172+
help)
173+
usage
174+
;;
175+
*)
176+
usage
177+
exit 1
178+
;;
179+
esac
180+
181+
exit 0
182+

platform/mellanox/one-image.mk

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,10 @@ $(SONIC_ONE_IMAGE)_MACHINE = mellanox
55
$(SONIC_ONE_IMAGE)_IMAGE_TYPE = onie
66
$(SONIC_ONE_IMAGE)_INSTALLS += $(SX_KERNEL) $(KERNEL_MFT) $(MFT_OEM) $(MFT) $(MLNX_HW_MANAGEMENT)
77
$(SONIC_ONE_IMAGE)_DOCKERS += $(SONIC_INSTALL_DOCKER_IMAGES)
8-
$(SONIC_ONE_IMAGE)_FILES += $(MLNX_FW_FILE) $(MLNX_CPLD_ARCHIVES) $(MLNX_FFB_SCRIPT) $(ISSU_VERSION_FILE)
8+
$(SONIC_ONE_IMAGE)_FILES += $(MLNX_FW_FILE) \
9+
$(MLNX_CPLD_ARCHIVES) \
10+
$(MLNX_FFB_SCRIPT) \
11+
$(ISSU_VERSION_FILE) \
12+
$(HW_MANAGEMENT_WD_SCRIPT)
13+
914
SONIC_INSTALLERS += $(SONIC_ONE_IMAGE)

platform/mellanox/rules.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ include $(PLATFORM_PATH)/fw.mk
44
include $(PLATFORM_PATH)/mft.mk
55
include $(PLATFORM_PATH)/mlnx-sai.mk
66
include $(PLATFORM_PATH)/hw-management.mk
7+
include $(PLATFORM_PATH)/hw-management-wd.mk
78
include $(PLATFORM_PATH)/mlnx-platform-api.mk
89
include $(PLATFORM_PATH)/docker-syncd-mlnx.mk
910
include $(PLATFORM_PATH)/docker-syncd-mlnx-rpc.mk

0 commit comments

Comments
 (0)