Skip to content

Commit aeb7074

Browse files
ganglyupull[bot]
authored andcommitted
Share image for gnmi and telemetry (#16863)
Why I did it Share docker image to support gnmi container and telemetry container Work item tracking Microsoft ADO 25423918: How I did it Create telemetry image from gnmi docker image. Enable gnmi container and disable telemetry container by default. How to verify it Run end to end test.
1 parent 329e7dd commit aeb7074

File tree

24 files changed

+334
-30
lines changed

24 files changed

+334
-30
lines changed

Makefile.work

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,7 @@ SONIC_BUILD_INSTRUCTION := $(MAKE) \
543543
DOCKER_LOCKFILE_SAVE=$(DOCKER_LOCKFILE_SAVE) \
544544
SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD=$(SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD) \
545545
SONIC_INCLUDE_SYSTEM_TELEMETRY=$(INCLUDE_SYSTEM_TELEMETRY) \
546+
SONIC_INCLUDE_SYSTEM_GNMI=$(INCLUDE_SYSTEM_GNMI) \
546547
INCLUDE_DHCP_RELAY=$(INCLUDE_DHCP_RELAY) \
547548
INCLUDE_DHCP_SERVER=$(INCLUDE_DHCP_SERVER) \
548549
INCLUDE_MACSEC=$(INCLUDE_MACSEC) \
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
2+
FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
3+
4+
ARG docker_container_name
5+
ARG image_version
6+
7+
## Make apt-get non-interactive
8+
ENV DEBIAN_FRONTEND=noninteractive
9+
10+
# Pass the image_version to container
11+
ENV IMAGE_VERSION=$image_version
12+
13+
RUN apt-get update
14+
15+
{% if docker_sonic_gnmi_debs.strip() -%}
16+
# Copy locally-built Debian package dependencies
17+
{{ copy_files("debs/", docker_sonic_gnmi_debs.split(' '), "/debs/") }}
18+
19+
# Install locally-built Debian packages and implicitly install their dependencies
20+
{{ install_debian_packages(docker_sonic_gnmi_debs.split(' ')) }}
21+
{%- endif %}
22+
23+
RUN apt-get clean -y && \
24+
apt-get autoclean - && \
25+
apt-get autoremove -y && \
26+
rm -rf /debs
27+
28+
COPY ["start.sh", "gnmi-native.sh", "dialout.sh", "/usr/bin/"]
29+
COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"]
30+
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
31+
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
32+
COPY ["critical_processes", "/etc/supervisor"]
33+
34+
ENTRYPOINT ["/usr/local/bin/supervisord"]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
###############################################################################
2+
## Monit configuration for telemetry container
3+
###############################################################################
4+
check program container_memory_gnmi with path "/usr/bin/memory_checker gnmi 419430400"
5+
if status == 3 for 10 times within 20 cycles then exec "/usr/bin/restart_service gnmi" repeat every 2 cycles
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
program:gnmi-native

dockers/docker-sonic-gnmi/dialout.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env bash
2+
3+
# Start with default config
4+
export CVL_SCHEMA_PATH=/usr/sbin/schema
5+
exec /usr/sbin/dialout_client_cli -insecure -logtostderr -v 2
6+
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/usr/bin/env bash
2+
3+
EXIT_TELEMETRY_VARS_FILE_NOT_FOUND=1
4+
INCORRECT_TELEMETRY_VALUE=2
5+
TELEMETRY_VARS_FILE=/usr/share/sonic/templates/telemetry_vars.j2
6+
7+
if [ ! -f "$TELEMETRY_VARS_FILE" ]; then
8+
echo "Telemetry vars template file not found"
9+
exit $EXIT_TELEMETRY_VARS_FILE_NOT_FOUND
10+
fi
11+
12+
# Try to read telemetry and certs config from ConfigDB.
13+
# Use default value if no valid config exists
14+
TELEMETRY_VARS=$(sonic-cfggen -d -t $TELEMETRY_VARS_FILE)
15+
TELEMETRY_VARS=${TELEMETRY_VARS//[\']/\"}
16+
X509=$(echo $TELEMETRY_VARS | jq -r '.x509')
17+
GNMI=$(echo $TELEMETRY_VARS | jq -r '.gnmi')
18+
CERTS=$(echo $TELEMETRY_VARS | jq -r '.certs')
19+
20+
TELEMETRY_ARGS=" -logtostderr"
21+
export CVL_SCHEMA_PATH=/usr/sbin/schema
22+
23+
if [ -n "$CERTS" ]; then
24+
SERVER_CRT=$(echo $CERTS | jq -r '.server_crt')
25+
SERVER_KEY=$(echo $CERTS | jq -r '.server_key')
26+
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
27+
TELEMETRY_ARGS+=" --insecure"
28+
else
29+
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
30+
fi
31+
32+
CA_CRT=$(echo $CERTS | jq -r '.ca_crt')
33+
if [ ! -z $CA_CRT ]; then
34+
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
35+
fi
36+
elif [ -n "$X509" ]; then
37+
SERVER_CRT=$(echo $X509 | jq -r '.server_crt')
38+
SERVER_KEY=$(echo $X509 | jq -r '.server_key')
39+
if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then
40+
TELEMETRY_ARGS+=" --insecure"
41+
else
42+
TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY "
43+
fi
44+
45+
CA_CRT=$(echo $X509 | jq -r '.ca_crt')
46+
if [ ! -z $CA_CRT ]; then
47+
TELEMETRY_ARGS+=" --ca_crt $CA_CRT"
48+
fi
49+
else
50+
TELEMETRY_ARGS+=" --noTLS"
51+
fi
52+
53+
# If no configuration entry exists for TELEMETRY, create one default port
54+
if [ -z "$GNMI" ]; then
55+
PORT=8080
56+
else
57+
PORT=$(echo $GNMI | jq -r '.port')
58+
fi
59+
TELEMETRY_ARGS+=" --port $PORT"
60+
61+
CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth')
62+
if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "false" ]; then
63+
TELEMETRY_ARGS+=" --allow_no_client_auth"
64+
fi
65+
66+
LOG_LEVEL=$(echo $GNMI | jq -r '.log_level')
67+
if [[ $LOG_LEVEL =~ ^[0-9]+$ ]]; then
68+
TELEMETRY_ARGS+=" -v=$LOG_LEVEL"
69+
else
70+
TELEMETRY_ARGS+=" -v=2"
71+
fi
72+
73+
# Enable ZMQ for SmartSwitch
74+
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
75+
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
76+
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
77+
fi
78+
79+
# Server will handle threshold connections consecutively
80+
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
81+
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
82+
TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS"
83+
else
84+
if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
85+
TELEMETRY_ARGS+=" --threshold 100"
86+
else
87+
echo "Incorrect threshold value, expecting positive integers" >&2
88+
exit $INCORRECT_TELEMETRY_VALUE
89+
fi
90+
fi
91+
92+
# Close idle connections after certain duration (in seconds)
93+
IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration')
94+
if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then
95+
TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION"
96+
else
97+
if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
98+
TELEMETRY_ARGS+=" --idle_conn_duration 5"
99+
else
100+
echo "Incorrect idle_conn_duration value, expecting positive integers" >&2
101+
exit $INCORRECT_TELEMETRY_VALUE
102+
fi
103+
fi
104+
105+
exec /usr/sbin/telemetry ${TELEMETRY_ARGS}

dockers/docker-sonic-gnmi/start.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
3+
if [ "${RUNTIME_OWNER}" == "" ]; then
4+
RUNTIME_OWNER="kube"
5+
fi
6+
7+
CTR_SCRIPT="/usr/share/sonic/scripts/container_startup.py"
8+
if test -f ${CTR_SCRIPT}
9+
then
10+
${CTR_SCRIPT} -f gnmi -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION}
11+
fi
12+
13+
mkdir -p /var/sonic
14+
echo "# Config files managed by sonic-config-engine" > /var/sonic/config_status
15+
16+
TZ=$(cat /etc/timezone)
17+
rm -rf /etc/localtime
18+
ln -sf /usr/share/zoneinfo/$TZ /etc/localtime
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
[supervisord]
2+
logfile_maxbytes=1MB
3+
logfile_backups=2
4+
nodaemon=true
5+
6+
[eventlistener:dependent-startup]
7+
command=python3 -m supervisord_dependent_startup
8+
autostart=true
9+
autorestart=unexpected
10+
startretries=0
11+
exitcodes=0,3
12+
events=PROCESS_STATE
13+
buffer_size=1024
14+
15+
[eventlistener:supervisor-proc-exit-listener]
16+
command=/usr/bin/supervisor-proc-exit-listener --container-name gnmi
17+
events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING
18+
autostart=true
19+
autorestart=false
20+
buffer_size=1024
21+
22+
[program:rsyslogd]
23+
command=/usr/sbin/rsyslogd -n -iNONE
24+
priority=1
25+
autostart=false
26+
autorestart=true
27+
stdout_logfile=syslog
28+
stderr_logfile=syslog
29+
dependent_startup=true
30+
31+
[program:start]
32+
command=/usr/bin/start.sh
33+
priority=2
34+
autostart=false
35+
autorestart=false
36+
startsecs=0
37+
stdout_logfile=syslog
38+
stderr_logfile=syslog
39+
dependent_startup=true
40+
dependent_startup_wait_for=rsyslogd:running
41+
42+
[program:gnmi-native]
43+
command=/usr/bin/gnmi-native.sh
44+
priority=3
45+
autostart=false
46+
autorestart=false
47+
stdout_logfile=syslog
48+
stderr_logfile=syslog
49+
dependent_startup=true
50+
dependent_startup_wait_for=start:exited
51+
52+
[program:dialout]
53+
command=/usr/bin/dialout.sh
54+
priority=4
55+
autostart=false
56+
autorestart=false
57+
stdout_logfile=syslog
58+
stderr_logfile=syslog
59+
dependent_startup=true
60+
dependent_startup_wait_for=gnmi-native:running
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"certs": {% if "certs" in GNMI.keys() %}{{ GNMI["certs"] }}{% else %}""{% endif %},
3+
"gnmi" : {% if "gnmi" in GNMI.keys() %}{{ GNMI["gnmi"] }}{% else %}""{% endif %},
4+
"x509" : {% if "x509" in DEVICE_METADATA.keys() %}{{ DEVICE_METADATA["x509"] }}{% else %}""{% endif %}
5+
}

dockers/docker-sonic-telemetry/Dockerfile.j2

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %}
2-
FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
2+
FROM docker-sonic-gnmi-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}}
33

44
ARG docker_container_name
55
ARG image_version
@@ -28,7 +28,6 @@ RUN apt-get clean -y && \
2828
COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"]
2929
COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"]
3030
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
31-
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
3231
COPY ["critical_processes", "/etc/supervisor"]
3332

3433
ENTRYPOINT ["/usr/local/bin/supervisord"]

dockers/docker-sonic-telemetry/telemetry.sh

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,12 @@ else
7070
TELEMETRY_ARGS+=" -v=2"
7171
fi
7272

73-
# Enable ZMQ for SmartSwitch
74-
LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"`
75-
if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then
76-
TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100"
77-
fi
78-
7973
# Server will handle threshold connections consecutively
8074
THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold')
8175
if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then
8276
TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS"
8377
else
84-
if [ -z $GNMI ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
78+
if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then
8579
TELEMETRY_ARGS+=" --threshold 100"
8680
else
8781
echo "Incorrect threshold value, expecting positive integers" >&2
@@ -94,13 +88,13 @@ IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration')
9488
if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then
9589
TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION"
9690
else
97-
if [ -z $GNMI ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
91+
if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then
9892
TELEMETRY_ARGS+=" --idle_conn_duration 5"
9993
else
10094
echo "Incorrect idle_conn_duration value, expecting positive integers" >&2
10195
exit $INCORRECT_TELEMETRY_VALUE
10296
fi
10397
fi
104-
98+
TELEMETRY_ARGS+=" -gnmi_native_write=false"
10599

106100
exec /usr/sbin/telemetry ${TELEMETRY_ARGS}

files/build_templates/gnmi.service.j2

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[Unit]
2+
Description=GNMI container
3+
Requires=database.service
4+
After=database.service swss.service syncd.service
5+
Before=ntp-config.service
6+
BindsTo=sonic.target
7+
After=sonic.target
8+
StartLimitIntervalSec=1200
9+
StartLimitBurst=3
10+
11+
[Service]
12+
User={{ sonicadmin_user }}
13+
ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start
14+
ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait
15+
ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop
16+
RestartSec=30

files/build_templates/init_cfg.json.j2

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
{%- if include_restapi == "y" %}{% do features.append(("restapi", "enabled", false, "enabled")) %}{% endif %}
5555
{%- if include_sflow == "y" %}{% do features.append(("sflow", "disabled", true, "enabled")) %}{% endif %}
5656
{%- if include_macsec == "y" %}{% do features.append(("macsec", "{% if 'type' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['type'] == 'SpineRouter' and DEVICE_RUNTIME_METADATA['MACSEC_SUPPORTED'] %}enabled{% else %}disabled{% endif %}", false, "enabled")) %}{% endif %}
57+
{%- if include_system_gnmi == "y" %}{% do features.append(("gnmi", "enabled", true, "enabled")) %}{% endif %}
5758
{%- if include_system_telemetry == "y" %}{% do features.append(("telemetry", "enabled", true, "enabled")) %}{% endif %}
5859
"FEATURE": {
5960
{# delayed field if set, will start the feature systemd .timer unit instead of .service unit #}
@@ -76,7 +77,7 @@
7677
"check_up_status" : "false",
7778
{%- endif %}
7879
{%- if include_kubernetes == "y" %}
79-
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry"] %}
80+
{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry", "gnmi"] %}
8081
"set_owner": "kube", {% else %}
8182
"set_owner": "local", {% endif %} {% endif %}
8283
"high_mem_alert": "disabled"

files/build_templates/sonic_debian_extension.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,7 @@ sudo LANG=C cp $SCRIPTS_DIR/radv.sh $FILESYSTEM_ROOT/usr/local/bin/radv.sh
909909
sudo LANG=C cp $SCRIPTS_DIR/database.sh $FILESYSTEM_ROOT/usr/local/bin/database.sh
910910
sudo LANG=C cp $SCRIPTS_DIR/snmp.sh $FILESYSTEM_ROOT/usr/local/bin/snmp.sh
911911
sudo LANG=C cp $SCRIPTS_DIR/telemetry.sh $FILESYSTEM_ROOT/usr/local/bin/telemetry.sh
912+
sudo LANG=C cp $SCRIPTS_DIR/gnmi.sh $FILESYSTEM_ROOT/usr/local/bin/gnmi.sh
912913
sudo LANG=C cp $SCRIPTS_DIR/mgmt-framework.sh $FILESYSTEM_ROOT/usr/local/bin/mgmt-framework.sh
913914
sudo LANG=C cp $SCRIPTS_DIR/asic_status.sh $FILESYSTEM_ROOT/usr/local/bin/asic_status.sh
914915
sudo LANG=C cp $SCRIPTS_DIR/asic_status.py $FILESYSTEM_ROOT/usr/local/bin/asic_status.py

files/image_config/logrotate/rsyslog.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
/var/log/syslog
2929
/var/log/teamd.log
3030
/var/log/telemetry.log
31+
/var/log/gnmi.log
3132
/var/log/frr/bgpd.log
3233
/var/log/frr/zebra.log
3334
/var/log/swss/sairedis*.rec

files/image_config/monit/container_checker

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def get_expected_running_containers():
5858
for container_name in feature_table.keys():
5959
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
6060
if multi_asic.is_multi_asic():
61-
if feature_table[container_name]["has_global_scope"] == "True":
61+
if feature_table[container_name].get("has_global_scope", "True") == "True":
6262
expected_running_containers.add(container_name)
63-
if feature_table[container_name]["has_per_asic_scope"] == "True":
63+
if feature_table[container_name].get("has_per_asic_scope", "False") == "True":
6464
num_asics = multi_asic.get_num_asics()
6565
for asic_id in range(num_asics):
6666
if asic_id in asics_id_presence or container_name in run_all_instance_list:
@@ -69,9 +69,9 @@ def get_expected_running_containers():
6969
expected_running_containers.add(container_name)
7070
if feature_table[container_name]["state"] == 'always_enabled':
7171
if multi_asic.is_multi_asic():
72-
if feature_table[container_name]["has_global_scope"] == "True":
72+
if feature_table[container_name].get("has_global_scope", "True") == "True":
7373
always_running_containers.add(container_name)
74-
if feature_table[container_name]["has_per_asic_scope"] == "True":
74+
if feature_table[container_name].get("has_per_asic_scope", "False") == "True":
7575
num_asics = multi_asic.get_num_asics()
7676
for asic_id in range(num_asics):
7777
if asic_id in asics_id_presence or container_name in run_all_instance_list:

files/image_config/rsyslog/rsyslog.d/00-sonic.conf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ if $programname contains "teamd_" then {
3232
stop
3333
}
3434

35+
## gnmi rules
36+
if $msg startswith " gnmi-native" then {
37+
/var/log/gnmi.log
38+
stop
39+
}
40+
3541
## telemetry rules
3642
if $msg startswith " telemetry" or ($msg startswith " dialout" )then {
3743
/var/log/telemetry.log

files/scripts/gnmi.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
service_mgmt.sh

0 commit comments

Comments
 (0)