Skip to content

Commit ff32ac3

Browse files
authored
[Auto Techsupport] Event driven Techsupport Changes (#8670)
#### Why I did it Changes required for feature "Event Driven TechSupport Invocation & CoreDump Mgmt". [HLD](sonic-net/SONiC#818 ) Requires: sonic-net/sonic-utilities#1796. Merging in any order would be fine. Summary of the changes: - Added the YANG Models for the new tables introduces as a part of this feature. - Enhanced init_cfg.json with the default config required - Added a compile Time flag which enables/disables the config required for this feature inside the init_cfg.json - Enhanced the supervisor-proc-exit-listener script to populate `<feature>:<critical_proc> = <comm>:<pid>` info in the STATE_DB when it observes an proc exit notification for the critical processes running inside the docker.
1 parent 1c4ca07 commit ff32ac3

File tree

9 files changed

+285
-1
lines changed

9 files changed

+285
-1
lines changed

Makefile.work

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
# * SONIC_DPKG_CACHE_SOURCE: Debian package cache location when cache enabled for debian packages
4141
# * BUILD_LOG_TIMESTAMP: Set timestamp in the build log (simple/none)
4242
# * DOCKER_EXTRA_OPTS: Extra command line arguments for dockerd running in slave container.
43+
# * ENABLE_AUTO_TECH_SUPPORT: Enable the configuration for event-driven techsupport & coredump mgmt feature
44+
# * Default: y
45+
# * Values: y,n
4346
#
4447
###############################################################################
4548

@@ -282,6 +285,7 @@ SONIC_BUILD_INSTRUCTION := make \
282285
SONIC_ENABLE_IMAGE_SIGNATURE=$(ENABLE_IMAGE_SIGNATURE) \
283286
ENABLE_HOST_SERVICE_ON_START=$(ENABLE_HOST_SERVICE_ON_START) \
284287
SLAVE_DIR=$(SLAVE_DIR) \
288+
ENABLE_AUTO_TECH_SUPPORT=$(ENABLE_AUTO_TECH_SUPPORT) \
285289
BUILD_MULTIASIC_KVM=$(BUILD_MULTIASIC_KVM) \
286290
$(SONIC_OVERRIDE_BUILD_VARS)
287291

files/build_templates/init_cfg.json.j2

+21
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,26 @@
6161
"high_mem_alert": "disabled"
6262
}{% if not loop.last %},{% endif -%}
6363
{% endfor %}
64+
},
65+
"AUTO_TECHSUPPORT": {
66+
"GLOBAL": {
67+
{%- if enable_auto_tech_support == "y" %}
68+
"state" : "enabled", {% else %}
69+
"state" : "disabled", {% endif %}
70+
"rate_limit_interval" : "180",
71+
"max_techsupport_limit" : "10.0",
72+
"max_core_limit" : "5.0",
73+
"since" : "2 days ago"
74+
}
75+
},
76+
"AUTO_TECHSUPPORT_FEATURE": {
77+
{%- for feature, _, _, _ in features %}
78+
"{{feature}}": {
79+
{%- if enable_auto_tech_support == "y" %}
80+
"state" : "enabled", {% else %}
81+
"state" : "disabled", {% endif %}
82+
"rate_limit_interval" : "600"
83+
}{%if not loop.last %},{% endif -%}
84+
{% endfor %}
6485
}
6586
}

rules/config

+3
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ INCLUDE_NAT = y
144144
# INCLUDE_DHCP_RELAY - build and install dhcp-relay package
145145
INCLUDE_DHCP_RELAY = y
146146

147+
# ENABLE_AUTO_TECH_SUPPORT - Enable the configuration for event-driven techsupport & coredump mgmt feature
148+
ENABLE_AUTO_TECH_SUPPORT = y
149+
147150
# TELEMETRY_WRITABLE - Enable write/config operations via the gNMI interface.
148151
# Uncomment to enable:
149152
# TELEMETRY_WRITABLE = y

slave.mk

+6
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ ifeq ($(SONIC_INCLUDE_MACSEC),y)
163163
INCLUDE_MACSEC = y
164164
endif
165165

166+
ifeq ($(ENABLE_AUTO_TECH_SUPPORT),y)
167+
ENABLE_AUTO_TECH_SUPPORT = y
168+
endif
169+
166170
ifeq ($(SONIC_INCLUDE_MUX),y)
167171
INCLUDE_MUX = y
168172
endif
@@ -290,6 +294,7 @@ $(info "INCLUDE_KUBERNETES" : "$(INCLUDE_KUBERNETES)")
290294
$(info "INCLUDE_MACSEC" : "$(INCLUDE_MACSEC)")
291295
$(info "INCLUDE_MUX" : "$(INCLUDE_MUX)")
292296
$(info "TELEMETRY_WRITABLE" : "$(TELEMETRY_WRITABLE)")
297+
$(info "ENABLE_AUTO_TECH_SUPPORT" : "$(ENABLE_AUTO_TECH_SUPPORT)")
293298
$(info "PDDF_SUPPORT" : "$(PDDF_SUPPORT)")
294299
$(info "MULTIARCH_QEMU_ENVIRON" : "$(MULTIARCH_QEMU_ENVIRON)")
295300
$(info "SONIC_VERSION_CONTROL_COMPONENTS": "$(SONIC_VERSION_CONTROL_COMPONENTS)")
@@ -987,6 +992,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
987992
export include_restapi="$(INCLUDE_RESTAPI)"
988993
export include_nat="$(INCLUDE_NAT)"
989994
export include_sflow="$(INCLUDE_SFLOW)"
995+
export enable_auto_tech_support="$(ENABLE_AUTO_TECH_SUPPORT)"
990996
export include_macsec="$(INCLUDE_MACSEC)"
991997
export include_mgmt_framework="$(INCLUDE_MGMT_FRAMEWORK)"
992998
export include_iccpd="$(INCLUDE_ICCPD)"

src/sonic-yang-models/setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def run(self):
7878
cmdclass={'build_py': my_build_py},
7979
data_files=[
8080
('yang-models', ['./yang-models/sonic-acl.yang',
81+
'./yang-models/sonic-auto_techsupport.yang',
8182
'./yang-models/sonic-bgp-common.yang',
8283
'./yang-models/sonic-bgp-global.yang',
8384
'./yang-models/sonic-bgp-neighbor.yang',

src/sonic-yang-models/tests/files/sample_config_db.json

+27-1
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,33 @@
11521152
"trap_group": "queue1_group1"
11531153
}
11541154
},
1155+
"AUTO_TECHSUPPORT": {
1156+
"GLOBAL": {
1157+
"state" : "enabled",
1158+
"rate_limit_interval" : "180",
1159+
"max_techsupport_limit" : "10.0",
1160+
"max_core_limit" : "0.0",
1161+
"since" : "2 days ago"
1162+
}
1163+
},
1164+
"AUTO_TECHSUPPORT_FEATURE": {
1165+
"bgp" :{
1166+
"state" : "disabled",
1167+
"rate_limit_interval" : "600"
1168+
},
1169+
"swss" : {
1170+
"state" : "enabled",
1171+
"rate_limit_interval" : "600"
1172+
},
1173+
"snmp" : {
1174+
"state" : "enabled",
1175+
"rate_limit_interval" : "500"
1176+
},
1177+
"dhcp_relay" : {
1178+
"state" : "disabled",
1179+
"rate_limit_interval" : "1000"
1180+
}
1181+
},
11551182
"LLDP": {
11561183
"GLOBAL": {
11571184
"mode": "TRANSMIT",
@@ -1371,7 +1398,6 @@
13711398
}
13721399
}
13731400
},
1374-
13751401
"SAMPLE_CONFIG_DB_UNKNOWN": {
13761402
"UNKNOWN_TABLE": {
13771403
"Error": "This Table is for testing, This Table does not have YANG models."
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"AUTO_TECHSUPPORT_TEST":{
3+
"desc": "Configure auto techsupport params in AUTO_TECHSUPPORT Table"
4+
},
5+
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
6+
"desc": "Configure state key with invalid value",
7+
"eStrKey": "InvalidValue"
8+
},
9+
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
10+
"desc" : "Configure cooloff with a value of invalid format",
11+
"eStrKey": "InvalidValue"
12+
},
13+
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
14+
"desc" : "Configure a value for core-uage outside the range [0, 100)",
15+
"eStr": "Value \"100.00\" does not satisfy the constraint \"0..99.99\" (range, length, or pattern)."
16+
},
17+
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
18+
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100)"
19+
},
20+
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
21+
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
22+
"eStrKey": "InvalidValue"
23+
},
24+
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
25+
"desc" : "Configure and test the valid configuration"
26+
}
27+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"AUTO_TECHSUPPORT_TEST": {
3+
"sonic-auto_techsupport:sonic-auto_techsupport": {
4+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
5+
"sonic-auto_techsupport:GLOBAL": {
6+
"state" : "enabled",
7+
"rate_limit_interval" : "180",
8+
"max_techsupport_limit" : "10.0",
9+
"max_core_limit" : "5.0",
10+
"since" : "2 days ago"
11+
}
12+
}
13+
}
14+
},
15+
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
16+
"sonic-auto_techsupport:sonic-auto_techsupport": {
17+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
18+
"sonic-auto_techsupport:GLOBAL": {
19+
"state" : "start",
20+
"rate_limit_interval" : "180",
21+
"max_techsupport_limit" : "10.0",
22+
"max_core_limit" : "5.0",
23+
"since" : "2 days ago"
24+
}
25+
}
26+
}
27+
},
28+
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
29+
"sonic-auto_techsupport:sonic-auto_techsupport": {
30+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
31+
"sonic-auto_techsupport:GLOBAL": {
32+
"rate_limit_interval" : "whatever"
33+
}
34+
}
35+
}
36+
},
37+
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
38+
"sonic-auto_techsupport:sonic-auto_techsupport": {
39+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
40+
"sonic-auto_techsupport:GLOBAL": {
41+
"max_core_limit" : "100.00",
42+
"rate_limit_interval" : "180"
43+
}
44+
}
45+
}
46+
},
47+
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
48+
"sonic-auto_techsupport:sonic-auto_techsupport": {
49+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
50+
"sonic-auto_techsupport:GLOBAL": {
51+
"max_techsupport_limit" : "11.23",
52+
"max_core_limit" : "99.99"
53+
}
54+
}
55+
}
56+
},
57+
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
58+
"sonic-auto_techsupport:sonic-auto_techsupport": {
59+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
60+
"sonic-auto_techsupport:GLOBAL": {
61+
"max_techsupport_limit" : "11.111",
62+
"max_core_limit" : "99.99"
63+
}
64+
}
65+
}
66+
},
67+
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
68+
"sonic-auto_techsupport:sonic-auto_techsupport": {
69+
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
70+
"AUTO_TECHSUPPORT_FEATURE_LIST": [
71+
{
72+
"feature_name" : "bgp",
73+
"state" : "enabled",
74+
"rate_limit_interval" : "600"
75+
},
76+
{
77+
"feature_name" : "swss",
78+
"state" : "disabled",
79+
"rate_limit_interval" : "400"
80+
}
81+
]
82+
}
83+
}
84+
}
85+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
module sonic-auto_techsupport {
2+
3+
yang-version 1.1;
4+
5+
namespace "http://github.com/Azure/sonic-auto_techsupport";
6+
prefix auto_techsupport;
7+
8+
import sonic-types {
9+
prefix stypes;
10+
}
11+
12+
description "Event Driven Techsupport & CoreDump Mgmt Capability in SONiC OS";
13+
14+
revision 2021-08-09 {
15+
description "First Revision";
16+
}
17+
18+
typedef decimal-repr {
19+
type decimal64 {
20+
fraction-digits 2;
21+
range 0.0..99.99;
22+
}
23+
}
24+
25+
container sonic-auto_techsupport {
26+
27+
container AUTO_TECHSUPPORT {
28+
29+
description "AUTO_TECHSUPPORT part of config_db.json";
30+
31+
container GLOBAL {
32+
33+
leaf state {
34+
description "Knob to make techsupport invocation event-driven based on core-dump generation";
35+
type stypes:admin_mode;
36+
}
37+
38+
leaf rate_limit_interval {
39+
description "Minimum time in seconds between two successive techsupport invocations. Configure 0 to explicitly disable";
40+
type uint16;
41+
}
42+
43+
leaf max_techsupport_limit {
44+
/*
45+
A value between 0.0-99.99 should be specified.
46+
The actual value in bytes is calculate based on the available space in the filesystem hosting /var/dump
47+
When the limit is crossed, the older dump files are incrementally deleted
48+
*/
49+
description "Max Limit in percentage for the cummulative size of ts dumps. No cleanup is performed if the value isn't configured or is 0.0";
50+
type decimal-repr;
51+
}
52+
53+
leaf max_core_limit {
54+
/*
55+
A value between 0.0-99.99 should be specified.
56+
The actual value in bytes is calculated based on the available space in the filesystem hosting /var/core
57+
When the limit is crossed, the older core files are incrementally deleted
58+
*/
59+
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
60+
type decimal-repr;
61+
}
62+
63+
leaf since {
64+
/*
65+
Any valid date string of the formats specified here (https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html)
66+
can be used.
67+
*/
68+
description "Only collect the logs & core-dumps generated since the time provided. A default value of '2 days ago' is used if this value is not set explicitly or a non-valid string is provided";
69+
type string {
70+
length 1..255;
71+
}
72+
}
73+
}
74+
/* end of container GLOBAL */
75+
}
76+
/* end of container AUTO_TECHSUPPORT */
77+
78+
container AUTO_TECHSUPPORT_FEATURE {
79+
80+
description "AUTO_TECHSUPPORT_FEATURE part of config_db.json";
81+
82+
list AUTO_TECHSUPPORT_FEATURE_LIST {
83+
84+
key "feature_name";
85+
86+
leaf feature_name {
87+
description "The name of this feature";
88+
/* TODO: Leafref once the FEATURE YANG is added*/
89+
type string {
90+
length 1..255;
91+
}
92+
}
93+
94+
leaf state {
95+
description "Enable auto techsupport invocation on the processes running inside this feature";
96+
type stypes:admin_mode;
97+
}
98+
99+
leaf rate_limit_interval {
100+
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
101+
type uint16;
102+
}
103+
104+
}
105+
/* end of AUTO_TECHSUPPORT_FEATURE_LIST */
106+
}
107+
/* end of container AUTO_TECHSUPPORT_FEATURE */
108+
}
109+
/* end of top level container */
110+
}
111+

0 commit comments

Comments
 (0)