@@ -56,6 +56,145 @@ def session_toy_function(database_connection):
56
56
Model .metadata .drop_all (db )
57
57
58
58
59
+ def test_get_leaderboard (session_toy_db ):
60
+ """this test assumes that all the submissions in the database are 'new'"""
61
+ leaderboard_new = get_leaderboard (session_toy_db , "new" , "iris_test" )
62
+ assert leaderboard_new .count ("<tr>" ) == 6
63
+ leaderboard_new = get_leaderboard (session_toy_db , "new" , "iris_test" , "test_user" )
64
+ assert leaderboard_new .count ("<tr>" ) == 3
65
+
66
+ # run the dispatcher to process the different submissions
67
+ config = read_config (database_config_template ())
68
+ event_config = read_config (ramp_config_template ())
69
+ dispatcher = Dispatcher (config , event_config , n_workers = - 1 , hunger_policy = "exit" )
70
+ dispatcher .launch ()
71
+ session_toy_db .commit ()
72
+
73
+ assert get_leaderboard (session_toy_db , "new" , "iris_test" ) is None
74
+ # the iris dataset has a single submission which is failing
75
+ leaderboard_failed = get_leaderboard (session_toy_db , "failed" , "iris_test" )
76
+ assert leaderboard_failed .count ("<tr>" ) == 2
77
+ leaderboard_failed = get_leaderboard (
78
+ session_toy_db , "failed" , "iris_test" , "test_user"
79
+ )
80
+ assert leaderboard_failed .count ("<tr>" ) == 1
81
+ # check that we have a link to the log of the failed submission
82
+ assert re .match (r".*<a href=/.*/error.txt>.*" , leaderboard_failed , flags = re .DOTALL )
83
+
84
+ # the remaining submission should be successful
85
+ leaderboard_public = get_leaderboard (session_toy_db , "public" , "iris_test" )
86
+ assert leaderboard_public .count ("<tr>" ) == 4
87
+ leaderboard_public = get_leaderboard (
88
+ session_toy_db , "public" , "iris_test" , "test_user"
89
+ )
90
+ assert leaderboard_public .count ("<tr>" ) == 2
91
+
92
+ leaderboard_private = get_leaderboard (session_toy_db , "private" , "iris_test" )
93
+ assert leaderboard_private .count ("<tr>" ) == 4
94
+ leaderboard_private = get_leaderboard (
95
+ session_toy_db , "private" , "iris_test" , "test_user"
96
+ )
97
+ assert leaderboard_private .count ("<tr>" ) == 2
98
+
99
+ # the competition leaderboard will have the best solution for each user
100
+ competition_public = get_leaderboard (
101
+ session_toy_db , "public competition" , "iris_test"
102
+ )
103
+ assert competition_public .count ("<tr>" ) == 2
104
+ competition_private = get_leaderboard (
105
+ session_toy_db , "private competition" , "iris_test"
106
+ )
107
+ assert competition_private .count ("<tr>" ) == 2
108
+
109
+ # check the difference between the public and private leaderboard
110
+ assert leaderboard_private .count ("<td>" ) > leaderboard_public .count ("<td>" )
111
+ for private_term in ["bag" , "mean" , "std" , "private" ]:
112
+ assert private_term not in leaderboard_public
113
+ assert private_term in leaderboard_private
114
+
115
+ # check the column name in each leaderboard
116
+ assert (
117
+ """<th>submission ID</th>
118
+ <th>team</th>
119
+ <th>submission</th>
120
+ <th>bag public acc</th>
121
+ <th>mean public acc</th>
122
+ <th>std public acc</th>
123
+ <th>bag public error</th>
124
+ <th>mean public error</th>
125
+ <th>std public error</th>
126
+ <th>bag public nll</th>
127
+ <th>mean public nll</th>
128
+ <th>std public nll</th>
129
+ <th>bag public f1_70</th>
130
+ <th>mean public f1_70</th>
131
+ <th>std public f1_70</th>
132
+ <th>bag private acc</th>
133
+ <th>mean private acc</th>
134
+ <th>std private acc</th>
135
+ <th>bag private error</th>
136
+ <th>mean private error</th>
137
+ <th>std private error</th>
138
+ <th>bag private nll</th>
139
+ <th>mean private nll</th>
140
+ <th>std private nll</th>
141
+ <th>bag private f1_70</th>
142
+ <th>mean private f1_70</th>
143
+ <th>std private f1_70</th>
144
+ <th>train time [s]</th>
145
+ <th>validation time [s]</th>
146
+ <th>test time [s]</th>
147
+ <th>max RAM [MB]</th>
148
+ <th>submitted at (UTC)</th>"""
149
+ in leaderboard_private
150
+ )
151
+ assert (
152
+ """<th>team</th>
153
+ <th>submission</th>
154
+ <th>acc</th>
155
+ <th>error</th>
156
+ <th>nll</th>
157
+ <th>f1_70</th>
158
+ <th>train time [s]</th>
159
+ <th>validation time [s]</th>
160
+ <th>max RAM [MB]</th>
161
+ <th>submitted at (UTC)</th>"""
162
+ in leaderboard_public
163
+ )
164
+ assert (
165
+ """<th>team</th>
166
+ <th>submission</th>
167
+ <th>id</th>
168
+ <th>submitted at (UTC)</th>
169
+ <th>error</th>"""
170
+ in leaderboard_failed
171
+ )
172
+
173
+ # check the same for the competition leaderboard
174
+ assert (
175
+ """<th>rank</th>
176
+ <th>team</th>
177
+ <th>submission</th>
178
+ <th>acc</th>
179
+ <th>train time [s]</th>
180
+ <th>validation time [s]</th>
181
+ <th>submitted at (UTC)</th>"""
182
+ in competition_public
183
+ )
184
+ assert (
185
+ """<th>rank</th>
186
+ <th>move</th>
187
+ <th>team</th>
188
+ <th>submission</th>
189
+ <th>acc</th>
190
+ <th>train time [s]</th>
191
+ <th>validation time [s]</th>
192
+ <th>test time [s]</th>
193
+ <th>submitted at (UTC)</th>"""
194
+ in competition_private
195
+ )
196
+
197
+
59
198
def test_update_leaderboard_functions (session_toy_function ):
60
199
event_name = "iris_test"
61
200
user_name = "test_user"
@@ -208,145 +347,6 @@ def test_get_leaderboard_non_bagged_scores(session_toy_db, monkeypatch):
208
347
)
209
348
210
349
211
- def test_get_leaderboard (session_toy_db ):
212
- """this test assumes that all the submissions in the database are 'new'"""
213
- leaderboard_new = get_leaderboard (session_toy_db , "new" , "iris_test" )
214
- assert leaderboard_new .count ("<tr>" ) == 6
215
- leaderboard_new = get_leaderboard (session_toy_db , "new" , "iris_test" , "test_user" )
216
- assert leaderboard_new .count ("<tr>" ) == 3
217
-
218
- # run the dispatcher to process the different submissions
219
- config = read_config (database_config_template ())
220
- event_config = read_config (ramp_config_template ())
221
- dispatcher = Dispatcher (config , event_config , n_workers = - 1 , hunger_policy = "exit" )
222
- dispatcher .launch ()
223
- session_toy_db .commit ()
224
-
225
- assert get_leaderboard (session_toy_db , "new" , "iris_test" ) is None
226
- # the iris dataset has a single submission which is failing
227
- leaderboard_failed = get_leaderboard (session_toy_db , "failed" , "iris_test" )
228
- assert leaderboard_failed .count ("<tr>" ) == 2
229
- leaderboard_failed = get_leaderboard (
230
- session_toy_db , "failed" , "iris_test" , "test_user"
231
- )
232
- assert leaderboard_failed .count ("<tr>" ) == 1
233
- # check that we have a link to the log of the failed submission
234
- assert re .match (r".*<a href=/.*/error.txt>.*" , leaderboard_failed , flags = re .DOTALL )
235
-
236
- # the remaining submission should be successful
237
- leaderboard_public = get_leaderboard (session_toy_db , "public" , "iris_test" )
238
- assert leaderboard_public .count ("<tr>" ) == 4
239
- leaderboard_public = get_leaderboard (
240
- session_toy_db , "public" , "iris_test" , "test_user"
241
- )
242
- assert leaderboard_public .count ("<tr>" ) == 2
243
-
244
- leaderboard_private = get_leaderboard (session_toy_db , "private" , "iris_test" )
245
- assert leaderboard_private .count ("<tr>" ) == 4
246
- leaderboard_private = get_leaderboard (
247
- session_toy_db , "private" , "iris_test" , "test_user"
248
- )
249
- assert leaderboard_private .count ("<tr>" ) == 2
250
-
251
- # the competition leaderboard will have the best solution for each user
252
- competition_public = get_leaderboard (
253
- session_toy_db , "public competition" , "iris_test"
254
- )
255
- assert competition_public .count ("<tr>" ) == 2
256
- competition_private = get_leaderboard (
257
- session_toy_db , "private competition" , "iris_test"
258
- )
259
- assert competition_private .count ("<tr>" ) == 2
260
-
261
- # check the difference between the public and private leaderboard
262
- assert leaderboard_private .count ("<td>" ) > leaderboard_public .count ("<td>" )
263
- for private_term in ["bag" , "mean" , "std" , "private" ]:
264
- assert private_term not in leaderboard_public
265
- assert private_term in leaderboard_private
266
-
267
- # check the column name in each leaderboard
268
- assert (
269
- """<th>submission ID</th>
270
- <th>team</th>
271
- <th>submission</th>
272
- <th>bag public acc</th>
273
- <th>mean public acc</th>
274
- <th>std public acc</th>
275
- <th>bag public error</th>
276
- <th>mean public error</th>
277
- <th>std public error</th>
278
- <th>bag public nll</th>
279
- <th>mean public nll</th>
280
- <th>std public nll</th>
281
- <th>bag public f1_70</th>
282
- <th>mean public f1_70</th>
283
- <th>std public f1_70</th>
284
- <th>bag private acc</th>
285
- <th>mean private acc</th>
286
- <th>std private acc</th>
287
- <th>bag private error</th>
288
- <th>mean private error</th>
289
- <th>std private error</th>
290
- <th>bag private nll</th>
291
- <th>mean private nll</th>
292
- <th>std private nll</th>
293
- <th>bag private f1_70</th>
294
- <th>mean private f1_70</th>
295
- <th>std private f1_70</th>
296
- <th>train time [s]</th>
297
- <th>validation time [s]</th>
298
- <th>test time [s]</th>
299
- <th>max RAM [MB]</th>
300
- <th>submitted at (UTC)</th>"""
301
- in leaderboard_private
302
- )
303
- assert (
304
- """<th>team</th>
305
- <th>submission</th>
306
- <th>acc</th>
307
- <th>error</th>
308
- <th>nll</th>
309
- <th>f1_70</th>
310
- <th>train time [s]</th>
311
- <th>validation time [s]</th>
312
- <th>max RAM [MB]</th>
313
- <th>submitted at (UTC)</th>"""
314
- in leaderboard_public
315
- )
316
- assert (
317
- """<th>team</th>
318
- <th>submission</th>
319
- <th>id</th>
320
- <th>submitted at (UTC)</th>
321
- <th>error</th>"""
322
- in leaderboard_failed
323
- )
324
-
325
- # check the same for the competition leaderboard
326
- assert (
327
- """<th>rank</th>
328
- <th>team</th>
329
- <th>submission</th>
330
- <th>acc</th>
331
- <th>train time [s]</th>
332
- <th>validation time [s]</th>
333
- <th>submitted at (UTC)</th>"""
334
- in competition_public
335
- )
336
- assert (
337
- """<th>rank</th>
338
- <th>move</th>
339
- <th>team</th>
340
- <th>submission</th>
341
- <th>acc</th>
342
- <th>train time [s]</th>
343
- <th>validation time [s]</th>
344
- <th>test time [s]</th>
345
- <th>submitted at (UTC)</th>"""
346
- in competition_private
347
- )
348
-
349
-
350
350
@pytest .mark .parametrize (
351
351
"event_name, expected_size" ,
352
352
[("iris_test" , 4 ), ("iris_aws_test" , 0 ), ("boston_housing_test" , 0 )],
0 commit comments