Skip to content

Commit 06f8b1f

Browse files
[auto-ts] add memory check (#10433) (#12291)
#### Why I did it To support automatic techsupport invokation in case memory usage is too high. #### How I did it Implemented according to sonic-net/SONiC#939 #### How to verify it UT, manual test on the switch. *DEPENDS* on sonic-net/sonic-utilities#2116
1 parent 2b36f81 commit 06f8b1f

File tree

5 files changed

+103
-10
lines changed

5 files changed

+103
-10
lines changed

files/build_templates/init_cfg.json.j2

+4-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
"rate_limit_interval" : "180",
8585
"max_techsupport_limit" : "10.0",
8686
"max_core_limit" : "5.0",
87+
"available_mem_threshold": "10.0",
88+
"min_available_mem": "200",
8789
"since" : "2 days ago"
8890
}
8991
},
@@ -93,7 +95,8 @@
9395
{%- if enable_auto_tech_support == "y" %}
9496
"state" : "enabled", {% else %}
9597
"state" : "disabled", {% endif %}
96-
"rate_limit_interval" : "600"
98+
"rate_limit_interval" : "600",
99+
"available_mem_threshold": "10.0"
97100
}{%if not loop.last %},{% endif -%}
98101
{% endfor %}
99102
},

files/image_config/monit/conf.d/sonic-host

+3
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,6 @@ check program vnetRouteCheck with path "/usr/local/bin/vnet_route_check.py"
4646
every 5 cycles
4747
if status != 0 for 3 cycle then alert repeat every 1 cycles
4848

49+
# memory_check tool that verifies that memory usage does not cross the threshold or invokes techsupport.
50+
check program memory_check with path "/usr/local/bin/memory_threshold_check.py"
51+
if status == 2 for 10 times within 20 cycles then exec "/usr/local/bin/memory_threshold_check_handler.py"

src/sonic-yang-models/tests/yang_model_tests/tests/auto_techsupport.json

+16-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
},
99
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
1010
"desc" : "Configure cooloff with a value of invalid format",
11-
"eStrKey": "InvalidValue"
11+
"eStrKey": "InvalidValue"
1212
},
1313
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
1414
"desc" : "Configure a value for core-uage outside the range [0, 100)",
@@ -19,9 +19,23 @@
1919
},
2020
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
2121
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
22-
"eStrKey": "InvalidValue"
22+
"eStrKey": "InvalidValue"
2323
},
2424
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
2525
"desc" : "Configure and test the valid configuration"
26+
},
27+
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
28+
"desc" : "Configure and test the valid configuration"
29+
},
30+
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
31+
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
32+
"eStrKey": "InvalidValue"
33+
},
34+
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
35+
"desc" : "Configure and test the valid configuration"
36+
},
37+
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
38+
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
39+
"eStrKey": "InvalidValue"
2640
}
2741
}

src/sonic-yang-models/tests/yang_model_tests/tests_config/auto_techsupport.json

+62-7
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"max_techsupport_limit" : "10.0",
99
"max_core_limit" : "5.0",
1010
"since" : "2 days ago"
11-
}
11+
}
1212
}
1313
}
1414
},
@@ -20,8 +20,8 @@
2020
"rate_limit_interval" : "180",
2121
"max_techsupport_limit" : "10.0",
2222
"max_core_limit" : "5.0",
23-
"since" : "2 days ago"
24-
}
23+
"since" : "2 days ago"
24+
}
2525
}
2626
}
2727
},
@@ -30,7 +30,7 @@
3030
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
3131
"sonic-auto_techsupport:GLOBAL": {
3232
"rate_limit_interval" : "whatever"
33-
}
33+
}
3434
}
3535
}
3636
},
@@ -40,7 +40,7 @@
4040
"sonic-auto_techsupport:GLOBAL": {
4141
"max_core_limit" : "100.00",
4242
"rate_limit_interval" : "180"
43-
}
43+
}
4444
}
4545
}
4646
},
@@ -50,7 +50,7 @@
5050
"sonic-auto_techsupport:GLOBAL": {
5151
"max_techsupport_limit" : "11.23",
5252
"max_core_limit" : "99.99"
53-
}
53+
}
5454
}
5555
}
5656
},
@@ -60,7 +60,7 @@
6060
"sonic-auto_techsupport:GLOBAL": {
6161
"max_techsupport_limit" : "11.111",
6262
"max_core_limit" : "99.99"
63-
}
63+
}
6464
}
6565
}
6666
},
@@ -81,5 +81,60 @@
8181
]
8282
}
8383
}
84+
},
85+
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
86+
"sonic-auto_techsupport:sonic-auto_techsupport": {
87+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
88+
"sonic-auto_techsupport:GLOBAL": {
89+
"available_mem_threshold": "10.0",
90+
"min_available_mem": "900"
91+
}
92+
}
93+
}
94+
},
95+
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
96+
"sonic-auto_techsupport:sonic-auto_techsupport": {
97+
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
98+
"sonic-auto_techsupport:GLOBAL": {
99+
"available_mem_threshold": "11.111"
100+
}
101+
}
102+
}
103+
},
104+
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
105+
"sonic-auto_techsupport:sonic-auto_techsupport": {
106+
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
107+
"AUTO_TECHSUPPORT_FEATURE_LIST": [
108+
{
109+
"feature_name" : "bgp",
110+
"state" : "enabled",
111+
"available_mem_threshold": "10.0"
112+
},
113+
{
114+
"feature_name" : "swss",
115+
"state" : "disabled",
116+
"available_mem_threshold": "10.0"
117+
}
118+
]
119+
}
120+
}
121+
},
122+
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
123+
"sonic-auto_techsupport:sonic-auto_techsupport": {
124+
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
125+
"AUTO_TECHSUPPORT_FEATURE_LIST": [
126+
{
127+
"feature_name" : "bgp",
128+
"state" : "enabled",
129+
"available_mem_threshold": "11.111"
130+
},
131+
{
132+
"feature_name" : "swss",
133+
"state" : "disabled",
134+
"available_mem_threshold": "10.0"
135+
}
136+
]
137+
}
138+
}
84139
}
85140
}

src/sonic-yang-models/yang-models/sonic-auto_techsupport.yang

+18
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ module sonic-auto_techsupport {
5959
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
6060
type decimal-repr;
6161
}
62+
63+
leaf available_mem_threshold {
64+
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
65+
type decimal-repr;
66+
default 10.0;
67+
}
68+
69+
leaf min_available_mem {
70+
description "Minimum Free memory (in MB) that should be available for the techsupport execution to start";
71+
type uint32;
72+
default 200;
73+
}
6274

6375
leaf since {
6476
/*
@@ -96,6 +108,12 @@ module sonic-auto_techsupport {
96108
type stypes:admin_mode;
97109
}
98110

111+
leaf available_mem_threshold {
112+
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
113+
type decimal-repr;
114+
default 10.0;
115+
}
116+
99117
leaf rate_limit_interval {
100118
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
101119
type uint16;

0 commit comments

Comments
 (0)