@@ -113,16 +113,143 @@ def main(path, k, tau, method, cluster_method, param, output, tolclust, tolplace
113
113
total_method_time = time .time ()
114
114
115
115
# Global loop for getting data
116
- reader .init_reader (path , use_kafka )
117
116
print (it .csv_reader )
118
117
print (it .avro_deserializer )
119
- it .Sentry = True
120
- # print('df_indiv1: ',my_instance.df_indiv)
118
+ # it.s_entry = True
119
+ current_time = 0
120
+ # Offline / online separation : TODO in parameters
121
+ offline_sep = 3
122
+ my_instance .sep_time = offline_sep
123
+ tick = config ['loop' ]['tick' ]
124
+
125
+ total_loop_time = 0.0
126
+ loop_nb = 1
127
+ nb_clust_changes = 0
128
+ nb_place_changes = 0
129
+ total_nb_overload = 0
130
+
121
131
print ('Ready for new data...' )
122
132
try :
123
- while it .Sentry :
124
- print ('Getting next data one more timestamp data' )
133
+ while it .s_entry :
134
+ if current_time < offline_sep :
135
+ current_data = reader .get_next_data (
136
+ current_time , offline_sep , offline_sep + 1 , use_kafka
137
+ )
138
+ current_time += offline_sep
139
+ my_instance .df_indiv = current_data
140
+ my_instance .df_host = current_data .groupby (
141
+ [current_data [it .tick_field ], current_data [it .host_field ]],
142
+ as_index = False ).agg (it .dict_agg_metrics )
143
+
144
+ # Analysis period
145
+ start = time .time ()
146
+ (my_instance , df_host_evo ,
147
+ df_indiv_clust , labels_ ) = analysis_period (
148
+ my_instance , config , method
149
+ )
150
+ add_time (- 1 , 'total_t_obs' , (time .time () - start ))
151
+
152
+ cluster_profiles = clt .get_cluster_mean_profile (df_indiv_clust )
153
+ tmin = my_instance .sep_time - (my_instance .window_duration - 1 )
154
+ tmax = my_instance .sep_time
155
+
156
+ # it.results_file.write('Loop mode : %s\n' % mode)
157
+ logging .info ('Beginning the loop process ...\n ' )
158
+
159
+ (working_df_indiv , df_clust , w , u , v ) = build_matrices (
160
+ my_instance , tmin , tmax , labels_
161
+ )
162
+
163
+ # build initial optimisation model in pre loop using offline data
164
+ start = time .time ()
165
+ (clust_model , place_model ,
166
+ clustering_dual_values , placement_dual_values ) = pre_loop (
167
+ my_instance , working_df_indiv , df_clust ,
168
+ w , u , config ['loop' ]['constraints_dual' ], v ,
169
+ cluster_method , config ['optimization' ]['solver' ]
170
+ )
171
+ add_time (0 , 'total_loop' , (time .time () - start ))
172
+ print ('ready for loop ?' )
173
+ tmax += tick
174
+ tmin = tmax - (my_instance .window_duration - 1 )
175
+
176
+ else :
177
+ last_index = my_instance .df_indiv .index .levels [0 ][- 1 ]
178
+ current_data = reader .get_next_data (
179
+ current_time , config ['loop' ]['tick' ],
180
+ config ['loop' ]['tick' ] - current_time + 1 , use_kafka
181
+ )
182
+ current_time += config ['loop' ]['tick' ]
183
+ my_instance .df_indiv = pd .concat (
184
+ [my_instance .df_indiv , current_data ])
185
+ new_df_host = current_data .groupby (
186
+ [current_data [it .tick_field ], it .host_field ], as_index = False
187
+ ).agg (it .dict_agg_metrics )
188
+ new_df_host = new_df_host .astype ({
189
+ it .host_field : str ,
190
+ it .tick_field : int }
191
+ )
192
+ previous_timestamp = last_index
193
+ existing_machine_ids = my_instance .df_host [
194
+ my_instance .df_host [it .tick_field ] == previous_timestamp
195
+ ][it .host_field ].unique ()
196
+ missing_machine_ids = set (existing_machine_ids ) - set (
197
+ new_df_host [it .host_field ])
198
+
199
+ missing_rows = pd .DataFrame ({
200
+ 'timestamp' : int (current_time ),
201
+ 'machine_id' : list (missing_machine_ids ),
202
+ 'cpu' : 0.0
203
+ })
204
+ # new_df_host.sort_values(it.tick_field, inplace=True)
205
+ new_df_host = pd .concat ([new_df_host , missing_rows ])
206
+ new_df_host .set_index ([it .tick_field , it .host_field ], inplace = True , drop = False )
207
+ my_instance .df_host = pd .concat ([
208
+ my_instance .df_host , new_df_host
209
+ ])
210
+ print (current_data )
211
+ print (current_time )
212
+ print (my_instance .df_indiv )
213
+ print ('perform loop' )
214
+ (working_df_indiv , df_clust , w , u , v ) = build_matrices (
215
+ my_instance , tmin , tmax , labels_
216
+ )
217
+ nb_clust_changes_loop = 0
218
+ nb_place_changes_loop = 0
219
+ (nb_clust_changes_loop , nb_place_changes_loop ,
220
+ init_loop_silhouette , end_loop_silhouette ,
221
+ clust_conf_nodes , clust_conf_edges , clust_max_deg , clust_mean_deg ,
222
+ place_conf_nodes , place_conf_edges , place_max_deg , place_mean_deg ,
223
+ clust_model , place_model ,
224
+ clustering_dual_values , placement_dual_values ,
225
+ df_clust , cluster_profiles , labels_ ) = eval_sols (
226
+ my_instance , working_df_indiv , cluster_method ,
227
+ w , u , v , clust_model , place_model ,
228
+ config ['loop' ]['constraints_dual' ],
229
+ clustering_dual_values , placement_dual_values ,
230
+ config ['loop' ]['tol_dual_clust' ],
231
+ config ['loop' ]['tol_move_clust' ],
232
+ config ['loop' ]['tol_open_clust' ],
233
+ config ['loop' ]['tol_dual_place' ],
234
+ config ['loop' ]['tol_move_place' ],
235
+ df_clust , cluster_profiles , labels_ , loop_nb ,
236
+ config ['optimization' ]['solver' ]
237
+ )
238
+ it .results_file .write (
239
+ 'Number of changes in clustering : %d\n ' % nb_clust_changes_loop
240
+ )
241
+ it .results_file .write (
242
+ 'Number of changes in placement : %d\n ' % nb_place_changes_loop
243
+ )
244
+ nb_clust_changes += nb_clust_changes_loop
245
+ nb_place_changes += nb_place_changes_loop
246
+ tmax += tick
247
+ tmin = tmax - (my_instance .window_duration - 1 )
248
+ my_instance .time += 1
249
+ loop_nb += 1
250
+ print ('success ?' )
125
251
input ()
252
+
126
253
finally :
127
254
# Close down consumer to commit final offsets.
128
255
reader .close_reader (use_kafka )
@@ -266,7 +393,12 @@ def preprocess(
266
393
267
394
# Init containers & nodes data, then Instance
268
395
logging .info ('Loading data and creating Instance (Instance information are in results file)\n ' )
269
- instance = Instance (path , config , use_kafka )
396
+ if use_kafka :
397
+ reader .consume_all_data (config )
398
+ # reader.delete_kafka_topic(config)
399
+ reader .csv_to_stream (path , config )
400
+ reader .init_reader (path , use_kafka )
401
+ instance = Instance (path , config )
270
402
it .results_file .write ('Method used : %s\n ' % method )
271
403
instance .print_times (config ['loop' ]['tick' ])
272
404
@@ -328,8 +460,7 @@ def analysis_period(my_instance, config, method):
328
460
df_indiv [it .tick_field ] >= start_point ) & (
329
461
my_instance .df_indiv [it .tick_field ] <= end_point )
330
462
]
331
- print ('Check Stats' , start_point , end_point , working_df_indiv , my_instance .df_host )
332
- # working_df_indiv contains info of historical data 1/3 of the data
463
+
333
464
# First clustering part
334
465
logging .info ('Starting first clustering ...' )
335
466
print ('Starting first clustering ...' )
@@ -474,7 +605,7 @@ def run_period(
474
605
def signal_handler_sigint (signal_number , frame ):
475
606
"""Handle for exiting application via signal."""
476
607
print ('Exit application' )
477
- it .Sentry = False
608
+ it .s_entry = False
478
609
479
610
480
611
def streaming_eval (
@@ -567,7 +698,8 @@ def streaming_eval(
567
698
568
699
if use_kafka :
569
700
use_schema = False
570
- avro_deserializer = reader .connect_schema (use_schema )
701
+ avro_deserializer = reader .connect_schema (
702
+ use_schema , it .kafka_schema_url )
571
703
# time_to_send = my_instance.df_indiv['timestamp'].iloc[-1]
572
704
# history = True # consider historical data
573
705
# send last historical data to kafka
@@ -585,11 +717,11 @@ def streaming_eval(
585
717
# it.time_at.append(x)
586
718
# it.memory_usage.append(mem_before)
587
719
# it.total_mem_use.append(tot_mem_after)
588
- it .Sentry = True
720
+ it .s_entry = True
589
721
# print('df_indiv1: ',my_instance.df_indiv)
590
722
print ('Ready for new data...' )
591
723
try :
592
- while it .Sentry :
724
+ while it .s_entry :
593
725
594
726
loop_time = time .time ()
595
727
it .kafka_consumer .subscribe ([it .kafka_topics ['docker_topic' ]])
@@ -806,7 +938,7 @@ def streaming_eval(
806
938
807
939
# if tmax >= my_instance.time:
808
940
809
- # it.Sentry = True # change to False to end loop according to mock data
941
+ # it.s_entry = True # change to False to end loop according to mock data
810
942
# else:
811
943
# loop_nb += 1
812
944
my_instance .time += 1
0 commit comments