Skip to content

Commit 12e7393

Browse files
stepanblyschakmssonicbld
authored andcommitted
[nvidia] make sure shared storage with syncd is cleared on restarts (sonic-net#14547)
Why I did it Sharing the storage of syncd with other proprietary application extensions allows them to communicate with syncd in differnt ways. If one container wants to pass some information to syncd then shared storage can be used. However, today the shared storage isn't cleaned on restarts making it possible for syncd to read out-of-date information generated in the past. NOTE: No plans to use it for standard SONIC dockers and we are working on removing the SDK dependency from PMON docker How I did it Implemented new service to clean the shared storage. How to verify it Do reboot/fast-reboot/warm-reboot/config-reload/systemctl restart swss and verify /tmp/ is cleaned after each restart in syncd container. Signed-off-by: Stepan Blyschak <[email protected]>
1 parent 8210842 commit 12e7393

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

files/build_templates/docker_image_ctl.j2

+7-6
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ function waitForAllInstanceDatabaseConfigJsonFilesReady()
157157
done
158158
fi
159159
# Delay a second to allow all instance database_config.json files to be completely generated and fully accessible.
160-
# This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template
160+
# This delay is needed to make sure that the database_config.json files are correctly rendered from j2 template
161161
# files ( renderning takes some time )
162-
sleep 1
162+
sleep 1
163163
fi
164164
}
165165
{%- endif %}
@@ -240,7 +240,7 @@ function postStartAction()
240240
mv $WARM_DIR/dump.rdb $WARM_DIR/dump.rdb.old
241241
else
242242
# If there is a config_db.json dump file, load it.
243-
if [ -r /etc/sonic/config_db$DEV.json ]; then
243+
if [ -r /etc/sonic/config_db$DEV.json ]; then
244244

245245
if [ -r /etc/sonic/init_cfg.json ]; then
246246
$SONIC_CFGGEN -j /etc/sonic/init_cfg.json -j /etc/sonic/config_db$DEV.json --write-to-db
@@ -323,7 +323,7 @@ start() {
323323

324324
{%- if sonic_asic_platform == "broadcom" %}
325325
{%- if docker_container_name == "syncd" %}
326-
# Set the SYNCD_SHM_SIZE if this variable not defined
326+
# Set the SYNCD_SHM_SIZE if this variable not defined
327327
BRCM_PLATFORM_COMMON_DIR=/usr/share/sonic/device/x86_64-broadcom_common
328328
SYNCD_SHM_INI=$BRCM_PLATFORM_COMMON_DIR/syncd_shm.ini
329329

@@ -545,14 +545,15 @@ start() {
545545
{%- if docker_container_name == "syncd" %}
546546
-v /var/log/mellanox:/var/log/mellanox:rw \
547547
-v mlnx_sdk_socket:/var/run/sx_sdk \
548-
-v mlnx_sdk_ready:/tmp \
548+
-v /tmp/nv-syncd-shared/:/tmp \
549549
-v /dev/shm:/dev/shm:rw \
550550
-v /var/log/sai_failure_dump:/var/log/sai_failure_dump:rw \
551551
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
552552
{%- elif docker_container_name == "pmon" %}
553553
-v /var/run/hw-management:/var/run/hw-management:rw \
554554
-v mlnx_sdk_socket:/var/run/sx_sdk \
555-
-v mlnx_sdk_ready:/tmp \
555+
-v /tmp/nv-syncd-shared/:/tmp \
556+
-v /dev/shm:/dev/shm:rw \
556557
-e SX_API_SOCKET_FILE=/var/run/sx_sdk/sx_api.sock \
557558
-v /dev/shm:/dev/shm:rw \
558559
{%- else %}

files/build_templates/per_namespace/syncd.service.j2

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ After=updategraph.service
1919
BindsTo=sonic.target
2020
After=sonic.target
2121
Before=ntp-config.service
22+
{% if sonic_asic_platform == 'mellanox' %}
23+
Requires=nv-syncd-shared.service
24+
After=nv-syncd-shared.service
25+
{% endif %}
2226

2327
[Service]
2428
User=root

files/build_templates/sonic_debian_extension.j2

+4
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,10 @@ MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME=$(basename {{mlnx_platform_api_py3_wheel_path
931931
sudo cp {{mlnx_platform_api_py3_wheel_path}} $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
932932
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install $MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
933933
sudo rm -rf $FILESYSTEM_ROOT/$MLNX_SONIC_PLATFORM_PY3_WHEEL_NAME
934+
935+
# Install service that manages Nvidia specific shared storage
936+
sudo cp platform/mellanox/nv-syncd-shared/nv-syncd-shared.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/
937+
sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable nv-syncd-shared
934938
{% endif %}
935939

936940
{%- if SONIC_ROUTING_STACK == "frr" %}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[Unit]
2+
Description=Manage Nvidia specific syncd shared volume
3+
Requires=docker.service
4+
After=docker.service
5+
BindsTo=sonic.target
6+
After=sonic.target
7+
PartOf=syncd.service
8+
9+
[Service]
10+
Type=oneshot
11+
RemainAfterExit=yes
12+
ExecStart=rm -rf /tmp/nv-syncd-shared/
13+
ExecStart=mkdir -p /tmp/nv-syncd-shared/
14+
15+
[Install]
16+
WantedBy=sonic.target
17+

0 commit comments

Comments
 (0)