Skip to content

Commit 5921ba5

Browse files
authored
DellEMC S6100 SSD Monitor Porting Changes in 202012 (#8626)
Why I did it To monitor the SSD health condition in DellEMC S6100 platform post upgrade. A daemon is introduced to monitor the SSD every one hour. To check for SSD status at boot time and at the time of cold-reboot. All these changes are supported only for newer SSD firmware. Porting changes from 201911 branch Added a platform_reboot_pre_check script to prevent cold-reboot based on SSD status. Depends on sonic-net/sonic-utilities#1788 DO NOT MERGE UNTIL ABOVE PR IS MERGED How I did it On branch s6100_ssd_202012 Changes to be committed: (use "git restore --staged ..." to unstage) modified: platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install new file: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/iSMART_64 new file: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/platform_reboot_pre_check modified: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh new file: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_ssd_mon.sh new file: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_ssd_upgrade_status.sh new file: platform/broadcom/sonic-platform-modules-dell/s6100/scripts/soft-reboot_plugin new file: platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-monitor.service new file: platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-monitor.timer new file: platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-upgrade-status.service
1 parent f6f4c7f commit 5921ba5

10 files changed

+226
-0
lines changed

platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install

+8
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,26 @@ s6100/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_s6100_
1010
s6100/scripts/fast-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1111
s6100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1212
s6100/scripts/warm-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
13+
s6100/scripts/soft-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1314
s6100/scripts/ssd-fw-upgrade usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1415
s6100/scripts/override.conf /etc/systemd/system/systemd-reboot.service.d
1516
s6100/scripts/s6100_serial_getty_monitor etc/monit/conf.d
1617
common/dell_lpc_mon.sh usr/local/bin
18+
s6100/scripts/s6100_ssd_mon.sh usr/local/bin
19+
s6100/scripts/s6100_ssd_upgrade_status.sh usr/local/bin
1720
s6100/scripts/platform_sensors.py usr/local/bin
21+
s6100/scripts/platform_reboot_pre_check usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1822
s6100/modules/sonic_platform-1.0-py2-none-any.whl usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
1923
s6100/modules/sonic_platform-1.0-py3-none-any.whl usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
2024
s6100/scripts/platform_watchdog_enable.sh usr/local/bin
2125
s6100/scripts/platform_watchdog_disable.sh usr/local/bin
2226
s6100/scripts/sensors usr/bin
27+
s6100/scripts/iSMART_64 usr/local/bin
2328
s6100/systemd/platform-modules-s6100.service etc/systemd/system
2429
s6100/systemd/s6100-lpc-monitor.service etc/systemd/system
30+
s6100/systemd/s6100-ssd-monitor.service etc/systemd/system
31+
s6100/systemd/s6100-ssd-monitor.timer etc/systemd/system
32+
s6100/systemd/s6100-ssd-upgrade-status.service etc/systemd/system
2533
s6100/systemd/s6100-reboot-cause.service etc/systemd/system
2634
s6100/systemd/s6100-i2c-enumerate.service etc/systemd/system
2735
tools/flashrom/flashrom usr/local/bin/
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/bash
2+
SSD_FW_UPGRADE="/host/ssd_fw_upgrade"
3+
4+
_error_msg(){
5+
echo "The SSD on this unit is $1. Do not power-cycle/reboot this unit."
6+
echo "soft-/fast-/warm-reboot is allowed."
7+
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is $1. Do not power-cycle/reboot this unit."
8+
logger -p user.crit -t DELL_S6100_SSD_MON "soft-/fast-/warm-reboot is allowed."
9+
}
10+
11+
# Check SSD Status
12+
if [ -e $SSD_FW_UPGRADE/GPIO7_pending_upgrade ]; then
13+
_error_msg "running older firmware"
14+
exit 1
15+
fi
16+
17+
if [ -e $SSD_FW_UPGRADE/GPIO7_low ] || [ -e $SSD_FW_UPGRADE/GPIO7_error ]; then
18+
_error_msg "faulty"
19+
exit 1
20+
fi
21+
22+
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then
23+
iSMART="/usr/local/bin/iSMART_64"
24+
iSMART_OPTIONS="-d /dev/sda"
25+
26+
iSMART_CMD=`$iSMART $iSMART_OPTIONS`
27+
28+
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}')
29+
30+
if [ $GPIO_STATUS == "0x01" ];then
31+
exit 0
32+
else
33+
_error_msg "faulty"
34+
exit 1
35+
fi
36+
fi
37+
38+
exit 1

platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ if [[ "$1" == "init" ]]; then
4343
/usr/local/bin/platform_watchdog_disable.sh
4444
fi
4545

46+
systemctl start --no-block s6100-ssd-upgrade-status.service
47+
4648
is_fast_warm=$(cat /proc/cmdline | grep SONIC_BOOT_TYPE | wc -l)
4749

4850
if [[ "$is_fast_warm" == "1" ]]; then
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
SSD_FW_UPGRADE="/host/ssd_fw_upgrade"
4+
5+
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then
6+
iSMART="/usr/local/bin/iSMART_64"
7+
iSMART_OPTIONS="-d /dev/sda"
8+
9+
iSMART_CMD=`$iSMART $iSMART_OPTIONS`
10+
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}')
11+
12+
if [ $GPIO_STATUS != "0x01" ];then
13+
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty. Do not power-cycle/reboot this unit!"
14+
logger -p user.crit -t DELL_S6100_SSD_MON "soft-/fast-/warm-reboot is allowed."
15+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
16+
touch $SSD_FW_UPGRADE/GPIO7_low
17+
systemctl stop s6100-ssd-monitor.timer
18+
fi
19+
else
20+
systemctl stop s6100-ssd-monitor.timer
21+
fi
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/bin/bash
2+
3+
SSD_FW_UPGRADE="/host/ssd_fw_upgrade"
4+
5+
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then
6+
systemctl start --no-block s6100-ssd-monitor.timer
7+
exit 0
8+
fi
9+
10+
if [ -e $SSD_FW_UPGRADE/GPIO7_low ] || [ -e $SSD_FW_UPGRADE/GPIO7_error ]; then
11+
exit 0
12+
fi
13+
14+
[ ! -d $SSD_FW_UPGRADE ] && mkdir $SSD_FW_UPGRADE
15+
16+
SSD_UPGRADE_LOG="$SSD_FW_UPGRADE/upgrade.log"
17+
18+
SMART_CMD=`smartctl -a /dev/sda`
19+
20+
SSD_FW_VERSION=$(echo "$SMART_CMD" | grep "Firmware Version" | awk '{print $NF}')
21+
SSD_FW_VERSION=${SSD_FW_VERSION,,}
22+
SSD_MODEL=$(echo "$SMART_CMD" | grep "Device Model" | awk '{print $NF}')
23+
24+
if [ -e $SSD_FW_UPGRADE/GPIO7_pending_upgrade ]; then
25+
if [ $SSD_MODEL == "3IE" ] && [ $SSD_FW_VERSION == "s141002c" ]; then
26+
# If SSD Firmware is not upgraded
27+
exit 0
28+
fi
29+
if [ $SSD_FW_VERSION == "s16425c1" ] || [ $SSD_FW_VERSION == "s16425cq" ]; then
30+
# If SSD Firmware is not upgraded
31+
exit 0
32+
fi
33+
fi
34+
35+
echo "$0 `date` SSD FW upgrade logs post reboot." >> $SSD_UPGRADE_LOG
36+
37+
iSMART="/usr/local/bin/iSMART_64"
38+
iSMART_OPTIONS="-d /dev/sda"
39+
iSMART_CMD=`$iSMART $iSMART_OPTIONS`
40+
41+
SSD_UPGRADE_STATUS1=`io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 09 --offset 211; io_rd_wr.py --get --offset 212`
42+
SSD_UPGRADE_STATUS1=$(echo "$SSD_UPGRADE_STATUS1" | awk '{print $NF}')
43+
44+
SSD_UPGRADE_STATUS2=`io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 0A --offset 211; io_rd_wr.py --get --offset 212`
45+
SSD_UPGRADE_STATUS2=$(echo "$SSD_UPGRADE_STATUS2" | awk '{print $NF}')
46+
47+
if [ $SSD_UPGRADE_STATUS1 == "2" ]; then
48+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
49+
touch $SSD_FW_UPGRADE/GPIO7_error
50+
51+
echo "$0 `date` Upgraded to unknown version after first mp_64 upgrade." >> $SSD_UPGRADE_LOG
52+
53+
elif [ $SSD_MODEL == "3IE3" ] && [ $SSD_UPGRADE_STATUS2 == "2" ];then
54+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
55+
touch $SSD_FW_UPGRADE/GPIO7_error
56+
57+
echo "$0 `date` Upgraded to unknown version after second mp_64 upgrade." >> $SSD_UPGRADE_LOG
58+
59+
elif [ $SSD_FW_VERSION == "s210506g" ] || [ $SSD_FW_VERSION == "s16425cg" ]; then
60+
# If SSD Firmware is upgraded
61+
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}')
62+
63+
if [ $GPIO_STATUS != "0x01" ];then
64+
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty. Do not power-cycle/reboot this unit!"
65+
logger -p user.crit -t DELL_S6100_SSD_MON "soft-/fast-/warm-reboot is allowed."
66+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
67+
touch $SSD_FW_UPGRADE/GPIO7_low
68+
echo "$0 `date` The SSD on this unit is faulty. Do not power-cycle/reboot this unit!" >> $SSD_UPGRADE_LOG
69+
echo "$0 `date` soft-/fast-/warm-reboot is allowed." >> $SSD_UPGRADE_LOG
70+
71+
else
72+
if [ $SSD_UPGRADE_STATUS1 == "0" ]; then
73+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
74+
touch $SSD_FW_UPGRADE/GPIO7_high
75+
systemctl start --no-block s6100-ssd-monitor.timer
76+
77+
if [ $SSD_MODEL == "3IE" ];then
78+
echo "$0 `date` SSD FW upgraded from S141002C to S210506G in first mp_64." >> $SSD_UPGRADE_LOG
79+
else
80+
echo "$0 `date` SSD FW upgraded from S16425c1 to S16425cG in first mp_64." >> $SSD_UPGRADE_LOG
81+
fi
82+
elif [ $SSD_MODEL == "3IE3" ] && [ $SSD_UPGRADE_STATUS2 == "1" ]; then
83+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
84+
touch $SSD_FW_UPGRADE/GPIO7_low
85+
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty. Do not power-cycle/reboot this unit!"
86+
logger -p user.crit -t DELL_S6100_SSD_MON "soft-/fast-/warm-reboot is allowed."
87+
88+
echo "$0 `date` SSD entered loader mode in first mp_64 and upgraded to latest version after second mp_64." >> $SSD_UPGRADE_LOG
89+
fi
90+
fi
91+
92+
else
93+
if [ $SSD_UPGRADE_STATUS1 == "ff" ] && [ $SSD_UPGRADE_STATUS2 == "ff" ]; then
94+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
95+
touch $SSD_FW_UPGRADE/GPIO7_pending_upgrade
96+
97+
echo "$0 `date` SSD upgrade didn’t happen." >> $SSD_UPGRADE_LOG
98+
99+
elif [ $SSD_UPGRADE_STATUS1 == "1" ]; then
100+
rm -rf $SSD_FW_UPGRADE/GPIO7_*
101+
touch $SSD_FW_UPGRADE/GPIO7_low
102+
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty. Do not power-cycle/reboot this unit!"
103+
logger -p user.crit -t DELL_S6100_SSD_MON "soft-/fast-/warm-reboot is allowed."
104+
105+
echo "$0 `date` SSD entered loader mode in first mp_64 upgrade." >> $SSD_UPGRADE_LOG
106+
107+
if [ $SSD_MODEL == "3IE3" ] && [ $SSD_UPGRADE_STATUS2 == "0" ]; then
108+
echo "$0 `date` SSD entered loader mode in first mp_64 and recovered back to older version in second mp_64." >> $SSD_UPGRADE_LOG
109+
fi
110+
fi
111+
112+
fi
113+
114+
echo "$0 `date` SMF Register 1 = $SSD_UPGRADE_STATUS1" >> $SSD_UPGRADE_LOG
115+
echo "$0 `date` SMF Register 2 = $SSD_UPGRADE_STATUS2" >> $SSD_UPGRADE_LOG
116+
echo "$SMART_CMD" >> $SSD_UPGRADE_LOG
117+
echo "$iSMART_CMD" >> $SSD_UPGRADE_LOG
118+
sync
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fast-reboot_plugin
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Unit]
2+
Description=Dell S6100 SSD monitoring poller
3+
DefaultDependencies=no
4+
5+
[Service]
6+
User=root
7+
ExecStart=/usr/local/bin/s6100_ssd_mon.sh
8+
RemainAfterExit=no
9+
10+
[Install]
11+
WantedBy=multi-user.target
12+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Unit]
2+
Description=Dell S6100 SSD monitoring poller timer
3+
DefaultDependencies=no
4+
After=pmon.service
5+
6+
[Timer]
7+
OnBootSec=5min
8+
OnUnitActiveSec=60min
9+
10+
[Install]
11+
WantedBy=timers.target
12+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[Unit]
2+
Description= Checking Dell S6100 SSD upgrade status
3+
After=pmon.service
4+
DefaultDependencies=no
5+
6+
[Service]
7+
User=root
8+
Type=oneshot
9+
ExecStart=/usr/local/bin/s6100_ssd_upgrade_status.sh
10+
RemainAfterExit=no
11+
12+
[Install]
13+
WantedBy=multi-user.target
14+

0 commit comments

Comments
 (0)