2
2
import pytest
3
3
4
4
from collections import namedtuple , Counter
5
+ from tests .platform_tests .counterpoll .cpu_memory_helper import restore_counter_poll # lgtm [py/unused-import]
6
+ from tests .platform_tests .counterpoll .cpu_memory_helper import counterpoll_type # lgtm [py/unused-import]
7
+ from tests .platform_tests .counterpoll .counterpoll_helper import ConterpollHelper
8
+ from tests .platform_tests .counterpoll .counterpoll_constants import CounterpollConstants
9
+ from tests .common .mellanox_data import is_mellanox_device
5
10
6
11
7
12
pytestmark = [
@@ -22,6 +27,7 @@ def setup_thresholds(duthosts, enum_rand_one_per_hwsku_hostname):
22
27
high_cpu_consume_procs ['syncd' ] = 80
23
28
return memory_threshold , cpu_threshold , high_cpu_consume_procs
24
29
30
+
25
31
def test_cpu_memory_usage (duthosts , enum_rand_one_per_hwsku_hostname , setup_thresholds ):
26
32
"""Check DUT memory usage and process cpu usage are within threshold."""
27
33
duthost = duthosts [enum_rand_one_per_hwsku_hostname ]
@@ -35,36 +41,172 @@ def test_cpu_memory_usage(duthosts, enum_rand_one_per_hwsku_hostname, setup_thre
35
41
outstanding_procs_counter = Counter ()
36
42
for i , monit_result in enumerate (MonitResult (* _ ) for _ in monit_results ):
37
43
logging .debug ("------ Iteration %d ------" , i )
38
- if monit_result .memory ['used_percent' ] > memory_threshold :
39
- logging .debug ("system memory usage exceeds %d%%: %s" ,
40
- memory_threshold , monit_result .memory )
41
- outstanding_mem_polls [i ] = monit_result .memory
44
+ check_memory (i , memory_threshold , monit_result , outstanding_mem_polls )
42
45
for proc in monit_result .processes :
43
46
cpu_threshold = normal_cpu_threshold
44
- if high_cpu_consume_procs . has_key ( proc ['name' ]) :
45
- cpu_threshold = high_cpu_consume_procs [proc ['name' ]]
46
- if proc [ 'cpu_percent' ] >= cpu_threshold :
47
- logging . debug ( "process %s(%d) cpu usage exceeds %d%%." ,
48
- proc [ 'name' ], proc [ 'pid' ], cpu_threshold )
49
- outstanding_procs [ proc [ 'pid' ]] = proc [ 'name' ]
50
- outstanding_procs_counter [ proc [ 'pid' ]] += 1
47
+ if proc ['name' ] in high_cpu_consume_procs :
48
+ cpu_threshold = high_cpu_consume_procs [proc ['name' ]]
49
+ check_cpu_usage ( cpu_threshold , outstanding_procs , outstanding_procs_counter , proc )
50
+
51
+ analyse_monitoring_results ( cpu_threshold , memory_threshold , outstanding_mem_polls , outstanding_procs ,
52
+ outstanding_procs_counter , persist_threshold )
53
+
51
54
55
+ def analyse_monitoring_results (cpu_threshold , memory_threshold , outstanding_mem_polls , outstanding_procs ,
56
+ outstanding_procs_counter , persist_threshold ):
52
57
persist_outstanding_procs = []
53
58
for pid , freq in outstanding_procs_counter .most_common ():
54
59
if freq <= persist_threshold :
55
60
break
56
61
persist_outstanding_procs .append (pid )
57
-
58
62
if outstanding_mem_polls or persist_outstanding_procs :
59
- failure_message = ""
60
-
61
63
if outstanding_mem_polls :
62
- failure_message += "System memory usage exceeds {}%" .format (memory_threshold )
63
- if persist_outstanding_procs :
64
- failure_message += "; "
65
-
64
+ logging .error ("system memory usage exceeds %d%%" , memory_threshold )
66
65
if persist_outstanding_procs :
67
- failure_message += "Processes that persistently exceed CPU usage ({}%): {}" .format (
68
- cpu_threshold , [outstanding_procs [p ] for p in persist_outstanding_procs ])
66
+ logging .error (
67
+ "processes that persistently exceeds cpu usage %d%%: %s" ,
68
+ cpu_threshold ,
69
+ [outstanding_procs [p ] for p in persist_outstanding_procs ]
70
+ )
71
+ pytest .fail ("system cpu and memory usage check fails" )
72
+
73
+
74
+ @pytest .fixture (scope = 'module' )
75
+ def counterpoll_cpu_threshold (duthosts , request ):
76
+ counterpoll_cpu_usage_threshold = {"port-buffer-drop" : request .config .getoption ("--port_buffer_drop_cpu_usage_threshold" )}
77
+ return counterpoll_cpu_usage_threshold
78
+
79
+
80
+ def test_cpu_memory_usage_counterpoll (duthosts , enum_rand_one_per_hwsku_hostname ,
81
+ setup_thresholds , restore_counter_poll , counterpoll_type , counterpoll_cpu_threshold ):
82
+ """Check DUT memory usage and process cpu usage are within threshold.
83
+ Disable all counterpoll types except tested one
84
+ Collect memory and CPUs usage for 60 secs
85
+ Compare the memory usage with the memory threshold
86
+ Compare the average cpu usage with the cpu threshold for the specified progress
87
+ Restore counterpolls status
88
+ """
89
+ duthost = duthosts [enum_rand_one_per_hwsku_hostname ]
90
+ program_to_check = get_manufacturer_program_to_check (duthost )
91
+ if program_to_check is None :
92
+ pytest .skip ("Skip no program is offered to check" )
93
+
94
+ memory_threshold , _ , _ = setup_thresholds
95
+ counterpoll_cpu_usage_threshold = counterpoll_cpu_threshold [counterpoll_type ]
96
+
97
+ MonitResult = namedtuple ('MonitResult' , ['processes' , 'memory' ])
98
+ disable_all_counterpoll_type_except_tested (duthost , counterpoll_type )
99
+ monit_results = duthost .monit_process (iterations = 60 , delay_interval = 1 )['monit_results' ]
100
+ poll_interval = CounterpollConstants .COUNTERPOLL_INTERVAL [counterpoll_type ] // 1000
101
+
102
+ outstanding_mem_polls = {}
103
+ outstanding_procs = {}
104
+ outstanding_procs_counter = Counter ()
105
+
106
+ cpu_usage_program_to_check = []
107
+
108
+ prepare_ram_cpu_usage_results (MonitResult , counterpoll_cpu_usage_threshold , memory_threshold , monit_results , outstanding_mem_polls ,
109
+ outstanding_procs , outstanding_procs_counter , program_to_check ,
110
+ cpu_usage_program_to_check )
111
+
112
+ log_cpu_usage_by_vendor (cpu_usage_program_to_check , counterpoll_type )
113
+
114
+ cpu_usage_average = caculate_cpu_usge_average_value (extract_valid_cpu_usage_data (cpu_usage_program_to_check , poll_interval ), cpu_usage_program_to_check )
115
+ logging .info ("Average cpu_usage is {}" .format (cpu_usage_average ))
116
+ assert cpu_usage_average < counterpoll_cpu_usage_threshold , "cpu_usage_average of {} exceeds the cpu threshold:{}" .format (program_to_check , counterpoll_cpu_usage_threshold )
117
+ assert not outstanding_mem_polls , " Memory {} exceeds the memory threshold {} " .format (outstanding_mem_polls , memory_threshold )
118
+
119
+
120
+ def log_cpu_usage_by_vendor (cpu_usage_program_to_check , counterpoll_type ):
121
+ if cpu_usage_program_to_check :
122
+ logging .info ('CPU usage for counterpoll type {} : {}' .format (counterpoll_type , cpu_usage_program_to_check ))
123
+
124
+
125
+ def get_manufacturer_program_to_check (duthost ):
126
+ if is_mellanox_device (duthost ):
127
+ return CounterpollConstants .SX_SDK
128
+
129
+
130
+ def prepare_ram_cpu_usage_results (MonitResult , cpu_threshold , memory_threshold , monit_results , outstanding_mem_polls ,
131
+ outstanding_procs , outstanding_procs_counter , program_to_check ,
132
+ program_to_check_cpu_usage ):
133
+ for i , monit_result in enumerate (MonitResult (* _ ) for _ in monit_results ):
134
+ logging .debug ("------ Iteration %d ------" , i )
135
+ check_memory (i , memory_threshold , monit_result , outstanding_mem_polls )
136
+ for proc in monit_result .processes :
137
+ update_cpu_usage_desired_program (proc , program_to_check , program_to_check_cpu_usage )
138
+
139
+
140
+ def extract_valid_cpu_usage_data (program_to_check_cpu_usage , poll_interval ):
141
+ """
142
+ This method it to extract the valid cpu usage data according to the poll_interval
143
+ 1. Find the index for the max one for every poll interval,
144
+ 2. Discard the data if the index is on the edge(0 o the length of program_to_check_cpu_usage -1)
145
+ 3. If the index is closed in the neighbour interval, only keep the former one
146
+ 4. Return all indexes
147
+ For example:
148
+ poll_interval = 10
149
+ 7, 1, 0, 1, 0, 1, 5, 1, 1,2, 0, 1, 0, 1, 0, 6, 1, 1, 1,2
150
+ return [15]
151
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 8, 7, 1, 0, 1, 0, 6, 1, 1, 1,2
152
+ return [9]
153
+ """
154
+ valid_cpu_usage_center_index_list = []
155
+ poll_number = len (program_to_check_cpu_usage ) // poll_interval
156
+
157
+ def find_max_cpu_usage (cpu_usage_list , poll_times ):
158
+ max_cpu_usage = cpu_usage_list [0 ]
159
+ max_cpu_usage_index = 0
160
+ for i , cpu_usage in enumerate (cpu_usage_list ):
161
+ if cpu_usage > max_cpu_usage :
162
+ max_cpu_usage = cpu_usage
163
+ max_cpu_usage_index = i
164
+ return [max_cpu_usage , max_cpu_usage_index + poll_times * poll_interval ]
165
+
166
+ for i in range (0 , poll_number ):
167
+ max_cpu_usage , max_cpu_usage_index = find_max_cpu_usage (
168
+ program_to_check_cpu_usage [poll_interval * i :poll_interval * (i + 1 )], i )
169
+ if max_cpu_usage_index == 0 or max_cpu_usage_index == len (program_to_check_cpu_usage ) - 1 :
170
+ logging .info ("The data is on the edge:{}, discard it " .format (max_cpu_usage_index ))
171
+ else :
172
+ if valid_cpu_usage_center_index_list and valid_cpu_usage_center_index_list [- 1 ] + 1 == max_cpu_usage_index :
173
+ continue
174
+ valid_cpu_usage_center_index_list .append (max_cpu_usage_index )
175
+
176
+ return valid_cpu_usage_center_index_list
177
+
178
+
179
+ def caculate_cpu_usge_average_value (valid_cpu_usage_center_index_list , program_to_check_cpu_usage ):
180
+ len_valid_cpu_usage = len (valid_cpu_usage_center_index_list )
181
+ cpu_usage_average = 0.0
182
+ for i in valid_cpu_usage_center_index_list :
183
+ cpu_usage_average += sum (program_to_check_cpu_usage [i - 1 : i + 2 ])
184
+ logging .info ("cpu usage center index:{}: cpu usage:{}" .format (i , program_to_check_cpu_usage [i - 1 :i + 2 ]))
185
+ return cpu_usage_average / len_valid_cpu_usage / 3.0 if len_valid_cpu_usage != 0 else 0
186
+
187
+
188
+ def check_cpu_usage (cpu_threshold , outstanding_procs , outstanding_procs_counter , proc ):
189
+ if proc ['cpu_percent' ] >= cpu_threshold :
190
+ logging .debug ("process %s(%d) cpu usage exceeds %d%%." ,
191
+ proc ['name' ], proc ['pid' ], cpu_threshold )
192
+ outstanding_procs [proc ['pid' ]] = proc ['name' ]
193
+ outstanding_procs_counter [proc ['pid' ]] += 1
194
+
195
+
196
+ def update_cpu_usage_desired_program (proc , program_to_check , program_to_check_cpu_usage ):
197
+ if program_to_check :
198
+ if proc ['name' ] == program_to_check :
199
+ program_to_check_cpu_usage .append (proc ['cpu_percent' ])
200
+
201
+
202
+ def check_memory (i , memory_threshold , monit_result , outstanding_mem_polls ):
203
+ if monit_result .memory ['used_percent' ] > memory_threshold :
204
+ logging .debug ("system memory usage exceeds %d%%: %s" ,
205
+ memory_threshold , monit_result .memory )
206
+ outstanding_mem_polls [i ] = monit_result .memory
207
+
69
208
70
- pytest .fail (failure_message )
209
+ def disable_all_counterpoll_type_except_tested (duthost , counterpoll_type ):
210
+ available_types = ConterpollHelper .get_available_counterpoll_types (duthost )
211
+ available_types .remove (counterpoll_type )
212
+ ConterpollHelper .disable_counterpoll (duthost , available_types )
0 commit comments