Skip to content

Commit 087176e

Browse files
stephenxsmssonicbld
authored andcommitted
[Mellanox] Advance hw-mgmt to v.7.0020.4104 (sonic-net#13372)
- Why I did it Advance hw-mgmt service to V.7.0020.4100 Add missing thermal sensors that are supported by hw-mgmt package Delay system health service before hw-mgmt has started on Mellanox platform in order to avoid reading some sensors before ready. Depends on sonic-net/sonic-linux-kernel#305 - How I did it 1. Update hw mgmt version 2. Add missing sensors 3. Delay service - How to verify it Regression test. Signed-off-by: Stephen Sun <[email protected]>
1 parent 1d155b8 commit 087176e

File tree

11 files changed

+78
-46
lines changed

11 files changed

+78
-46
lines changed

device/mellanox/x86_64-mlnx_msn2010-r0/platform.json

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@
6767
},
6868
{
6969
"name": "CPU Core 3 Temp"
70+
},
71+
{
72+
"name": "SODIMM 1 Temp"
7073
}
7174
],
7275
"sfps": [

device/mellanox/x86_64-mlnx_msn2100-r0/platform.json

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@
6767
},
6868
{
6969
"name": "CPU Core 3 Temp"
70+
},
71+
{
72+
"name": "SODIMM 1 Temp"
7073
}
7174
],
7275
"sfps": [

device/mellanox/x86_64-mlnx_msn2410-r0/platform.json

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@
114114
},
115115
{
116116
"name": "CPU Pack Temp"
117+
},
118+
{
119+
"name": "SODIMM 1 Temp"
117120
}
118121
],
119122
"sfps": [

device/mellanox/x86_64-mlnx_msn2700-r0/platform.json

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@
114114
},
115115
{
116116
"name": "CPU Pack Temp"
117+
},
118+
{
119+
"name": "SODIMM 1 Temp"
117120
}
118121
],
119122
"sfps": [

device/mellanox/x86_64-nvidia_sn2201-r0/platform.json

+3
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@
101101
},
102102
{
103103
"name": "ASIC"
104+
},
105+
{
106+
"name": "SODIMM 1 Temp"
104107
}
105108
],
106109
"sfps": [{

platform/mellanox/hw-management.mk

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#
1717
# Mellanox HW Management
1818

19-
MLNX_HW_MANAGEMENT_VERSION = 7.0020.3006
19+
MLNX_HW_MANAGEMENT_VERSION = 7.0020.4104
2020

2121
export MLNX_HW_MANAGEMENT_VERSION
2222

Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
From 1a1011b6da491d35001df5a7204d4eecb2769767 Mon Sep 17 00:00:00 2001
1+
From 489764eb124e03087eb408dec27d769fa4f98459 Mon Sep 17 00:00:00 2001
22
From: keboliu <[email protected]>
33
Date: Fri, 15 Jan 2021 14:41:16 +0800
4-
Subject: [PATCH] Make SONiC determine-reboot-cause service start after hw-mgmt
5-
service
4+
Subject: [PATCH 1/4] Make SONiC determine-reboot-cause service start after
5+
hw-mgmt service
66

77
Signed-off-by: Kebo Liu <[email protected]>
88
---
99
debian/hw-management.hw-management.service | 1 +
1010
1 file changed, 1 insertion(+)
1111

1212
diff --git a/debian/hw-management.hw-management.service b/debian/hw-management.hw-management.service
13-
index 39a2a54..2104b87 100755
13+
index 8bdcaef..1c25ffb 100755
1414
--- a/debian/hw-management.hw-management.service
1515
+++ b/debian/hw-management.hw-management.service
1616
@@ -1,6 +1,7 @@
@@ -22,5 +22,5 @@ index 39a2a54..2104b87 100755
2222
[Service]
2323
Type=oneshot
2424
--
25-
1.9.1
25+
2.20.1
2626

Original file line numberDiff line numberDiff line change
@@ -1,59 +1,47 @@
1-
From 79dadd5b0d2f5e860b525c12d4d3843607b03a9f Mon Sep 17 00:00:00 2001
1+
From 422b64397f2f33b394d037820f0ceb4c09e3a725 Mon Sep 17 00:00:00 2001
22
From: Alexander Allen <[email protected]>
33
Date: Fri, 21 Jan 2022 16:47:19 +0000
4-
Subject: [PATCH] Disable hw-mgmt on SimX platforms
4+
Subject: [PATCH 2/4] Disable hw-mgmt on SimX platforms
55

66
---
7-
usr/usr/bin/hw-management-ready.sh | 31 ++++++++++++++++--------------
7+
usr/usr/bin/hw-management-ready.sh | 11 +++++++----
88
usr/usr/bin/hw-management.sh | 9 +++++++++
9-
2 files changed, 26 insertions(+), 14 deletions(-)
9+
2 files changed, 16 insertions(+), 4 deletions(-)
1010

1111
diff --git a/usr/usr/bin/hw-management-ready.sh b/usr/usr/bin/hw-management-ready.sh
12-
index 5a9698c..364f906 100755
12+
index 88672a8..7558c68 100755
1313
--- a/usr/usr/bin/hw-management-ready.sh
1414
+++ b/usr/usr/bin/hw-management-ready.sh
15-
@@ -51,19 +51,22 @@ if [ -d /var/run/hw-management ]; then
15+
@@ -51,17 +51,20 @@ if [ -d /var/run/hw-management ]; then
1616
rm -fr /var/run/hw-management
1717
fi
1818

1919
-case $board_type in
2020
-VMOD0014)
21-
- while [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]
22-
- do
23-
- sleep 1
24-
- done
25-
- ;;
21+
+if [ -z "$(lspci -vvv | grep SimX)" ]; then
22+
+ case $board_type in
23+
+ VMOD0014)
24+
if [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; then
25+
timeout 180 bash -c 'until [ -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
26+
fi
27+
;;
2628
-*)
27-
- while [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]
28-
- do
29-
- sleep 1
30-
- done
31-
- ;;
29+
+ *)
30+
if [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; then
31+
timeout 180 bash -c 'until [ -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
32+
fi
33+
;;
3234
-esac
33-
+if [ -z "$(lspci -vvv | grep SimX)" ]; then
34-
+ case $board_type in
35-
+ VMOD0014)
36-
+ while [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]
37-
+ do
38-
+ sleep 1
39-
+ done
40-
+ ;;
41-
+ *)
42-
+ while [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]
43-
+ do
44-
+ sleep 1
45-
+ done
46-
+ ;;
47-
+ esac
35+
+ esac
4836
+fi
4937
+
5038
echo "Start Chassis HW management service."
5139
logger -t hw-management -p daemon.notice "Start Chassis HW management service."
5240
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
53-
index ebfabb0..c0c038e 100755
41+
index 1ee05b5..50d922b 100755
5442
--- a/usr/usr/bin/hw-management.sh
5543
+++ b/usr/usr/bin/hw-management.sh
56-
@@ -1495,6 +1495,13 @@ do_chip_down()
44+
@@ -2310,6 +2310,13 @@ do_chip_down()
5745
/usr/bin/hw-management-thermal-events.sh change hotplug_asic down %S %p
5846
}
5947

@@ -67,7 +55,7 @@ index ebfabb0..c0c038e 100755
6755
__usage="
6856
Usage: $(basename "$0") [Options]
6957

70-
@@ -1520,6 +1527,8 @@ Options:
58+
@@ -2335,6 +2342,8 @@ Options:
7159
force-reload Performs hw-management 'stop' and the 'start.
7260
"
7361

@@ -77,5 +65,5 @@ index ebfabb0..c0c038e 100755
7765
start)
7866
if [ -d /var/run/hw-management ]; then
7967
--
80-
2.17.1
68+
2.20.1
8169

platform/mellanox/hw-management/0003-Remove-unused-non-upstream-kernel-modules-from-load.patch

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
From 14b06a12802fc0e15116a64f419d002d0d21d695 Mon Sep 17 00:00:00 2001
1+
From 439639e939f896f9aee42a4dbd5216feb728220c Mon Sep 17 00:00:00 2001
22
From: Alexander Allen <[email protected]>
33
Date: Thu, 17 Feb 2022 04:19:50 +0000
4-
Subject: [PATCH] Remove unused non-upstream kernel modules from load
4+
Subject: [PATCH 3/4] Remove unused non-upstream kernel modules from load
55

66
---
77
usr/etc/modules-load.d/05-hw-management-modules.conf | 2 --
88
1 file changed, 2 deletions(-)
99

1010
diff --git a/usr/etc/modules-load.d/05-hw-management-modules.conf b/usr/etc/modules-load.d/05-hw-management-modules.conf
11-
index 39f621e..c0980bc 100644
11+
index cfcfaa4..dd3b5ca 100644
1212
--- a/usr/etc/modules-load.d/05-hw-management-modules.conf
1313
+++ b/usr/etc/modules-load.d/05-hw-management-modules.conf
1414
@@ -15,8 +15,6 @@ xdpe12284
@@ -21,5 +21,5 @@ index 39f621e..c0980bc 100644
2121
gpio-pca953x
2222
pmbus
2323
--
24-
2.17.1
24+
2.20.1
2525

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
From 038bce6bf808ec9d082e96fec4184e060b3a85a9 Mon Sep 17 00:00:00 2001
2+
From: Stephen Sun <[email protected]>
3+
Date: Mon, 28 Nov 2022 03:55:14 +0000
4+
Subject: [PATCH 4/4] Make system-health service starts after hw-management to
5+
avoid failures
6+
7+
On SN2410, it can fail to read the file led_status_capability if it starts from ONIE
8+
9+
Signed-off-by: Stephen Sun <[email protected]>
10+
---
11+
debian/hw-management.hw-management.service | 2 +-
12+
1 file changed, 1 insertion(+), 1 deletion(-)
13+
14+
diff --git a/debian/hw-management.hw-management.service b/debian/hw-management.hw-management.service
15+
index 1c25ffb..0fbd877 100755
16+
--- a/debian/hw-management.hw-management.service
17+
+++ b/debian/hw-management.hw-management.service
18+
@@ -1,7 +1,7 @@
19+
[Unit]
20+
Description=Chassis HW management service of Mellanox systems
21+
Documentation=man:hw-management.service(8)
22+
-Before=determine-reboot-cause.service
23+
+Before=determine-reboot-cause.service system-health.service
24+
25+
[Service]
26+
Type=oneshot
27+
--
28+
2.20.1
29+
Submodule hw-mgmt updated 70 files

0 commit comments

Comments
 (0)