Skip to content

Commit 7b57a0f

Browse files
committed
Fix test execution order
Likely something changed in pytest
1 parent 95e0737 commit 7b57a0f

File tree

1 file changed

+139
-139
lines changed

1 file changed

+139
-139
lines changed

ramp-database/ramp_database/tools/tests/test_leaderboard.py

+139-139
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,145 @@ def session_toy_function(database_connection):
5656
Model.metadata.drop_all(db)
5757

5858

59+
def test_get_leaderboard(session_toy_db):
60+
"""this test assumes that all the submissions in the database are 'new'"""
61+
leaderboard_new = get_leaderboard(session_toy_db, "new", "iris_test")
62+
assert leaderboard_new.count("<tr>") == 6
63+
leaderboard_new = get_leaderboard(session_toy_db, "new", "iris_test", "test_user")
64+
assert leaderboard_new.count("<tr>") == 3
65+
66+
# run the dispatcher to process the different submissions
67+
config = read_config(database_config_template())
68+
event_config = read_config(ramp_config_template())
69+
dispatcher = Dispatcher(config, event_config, n_workers=-1, hunger_policy="exit")
70+
dispatcher.launch()
71+
session_toy_db.commit()
72+
73+
assert get_leaderboard(session_toy_db, "new", "iris_test") is None
74+
# the iris dataset has a single submission which is failing
75+
leaderboard_failed = get_leaderboard(session_toy_db, "failed", "iris_test")
76+
assert leaderboard_failed.count("<tr>") == 2
77+
leaderboard_failed = get_leaderboard(
78+
session_toy_db, "failed", "iris_test", "test_user"
79+
)
80+
assert leaderboard_failed.count("<tr>") == 1
81+
# check that we have a link to the log of the failed submission
82+
assert re.match(r".*<a href=/.*/error.txt>.*", leaderboard_failed, flags=re.DOTALL)
83+
84+
# the remaining submission should be successful
85+
leaderboard_public = get_leaderboard(session_toy_db, "public", "iris_test")
86+
assert leaderboard_public.count("<tr>") == 4
87+
leaderboard_public = get_leaderboard(
88+
session_toy_db, "public", "iris_test", "test_user"
89+
)
90+
assert leaderboard_public.count("<tr>") == 2
91+
92+
leaderboard_private = get_leaderboard(session_toy_db, "private", "iris_test")
93+
assert leaderboard_private.count("<tr>") == 4
94+
leaderboard_private = get_leaderboard(
95+
session_toy_db, "private", "iris_test", "test_user"
96+
)
97+
assert leaderboard_private.count("<tr>") == 2
98+
99+
# the competition leaderboard will have the best solution for each user
100+
competition_public = get_leaderboard(
101+
session_toy_db, "public competition", "iris_test"
102+
)
103+
assert competition_public.count("<tr>") == 2
104+
competition_private = get_leaderboard(
105+
session_toy_db, "private competition", "iris_test"
106+
)
107+
assert competition_private.count("<tr>") == 2
108+
109+
# check the difference between the public and private leaderboard
110+
assert leaderboard_private.count("<td>") > leaderboard_public.count("<td>")
111+
for private_term in ["bag", "mean", "std", "private"]:
112+
assert private_term not in leaderboard_public
113+
assert private_term in leaderboard_private
114+
115+
# check the column name in each leaderboard
116+
assert (
117+
"""<th>submission ID</th>
118+
<th>team</th>
119+
<th>submission</th>
120+
<th>bag public acc</th>
121+
<th>mean public acc</th>
122+
<th>std public acc</th>
123+
<th>bag public error</th>
124+
<th>mean public error</th>
125+
<th>std public error</th>
126+
<th>bag public nll</th>
127+
<th>mean public nll</th>
128+
<th>std public nll</th>
129+
<th>bag public f1_70</th>
130+
<th>mean public f1_70</th>
131+
<th>std public f1_70</th>
132+
<th>bag private acc</th>
133+
<th>mean private acc</th>
134+
<th>std private acc</th>
135+
<th>bag private error</th>
136+
<th>mean private error</th>
137+
<th>std private error</th>
138+
<th>bag private nll</th>
139+
<th>mean private nll</th>
140+
<th>std private nll</th>
141+
<th>bag private f1_70</th>
142+
<th>mean private f1_70</th>
143+
<th>std private f1_70</th>
144+
<th>train time [s]</th>
145+
<th>validation time [s]</th>
146+
<th>test time [s]</th>
147+
<th>max RAM [MB]</th>
148+
<th>submitted at (UTC)</th>"""
149+
in leaderboard_private
150+
)
151+
assert (
152+
"""<th>team</th>
153+
<th>submission</th>
154+
<th>acc</th>
155+
<th>error</th>
156+
<th>nll</th>
157+
<th>f1_70</th>
158+
<th>train time [s]</th>
159+
<th>validation time [s]</th>
160+
<th>max RAM [MB]</th>
161+
<th>submitted at (UTC)</th>"""
162+
in leaderboard_public
163+
)
164+
assert (
165+
"""<th>team</th>
166+
<th>submission</th>
167+
<th>id</th>
168+
<th>submitted at (UTC)</th>
169+
<th>error</th>"""
170+
in leaderboard_failed
171+
)
172+
173+
# check the same for the competition leaderboard
174+
assert (
175+
"""<th>rank</th>
176+
<th>team</th>
177+
<th>submission</th>
178+
<th>acc</th>
179+
<th>train time [s]</th>
180+
<th>validation time [s]</th>
181+
<th>submitted at (UTC)</th>"""
182+
in competition_public
183+
)
184+
assert (
185+
"""<th>rank</th>
186+
<th>move</th>
187+
<th>team</th>
188+
<th>submission</th>
189+
<th>acc</th>
190+
<th>train time [s]</th>
191+
<th>validation time [s]</th>
192+
<th>test time [s]</th>
193+
<th>submitted at (UTC)</th>"""
194+
in competition_private
195+
)
196+
197+
59198
def test_update_leaderboard_functions(session_toy_function):
60199
event_name = "iris_test"
61200
user_name = "test_user"
@@ -208,145 +347,6 @@ def test_get_leaderboard_non_bagged_scores(session_toy_db, monkeypatch):
208347
)
209348

210349

211-
def test_get_leaderboard(session_toy_db):
212-
"""this test assumes that all the submissions in the database are 'new'"""
213-
leaderboard_new = get_leaderboard(session_toy_db, "new", "iris_test")
214-
assert leaderboard_new.count("<tr>") == 6
215-
leaderboard_new = get_leaderboard(session_toy_db, "new", "iris_test", "test_user")
216-
assert leaderboard_new.count("<tr>") == 3
217-
218-
# run the dispatcher to process the different submissions
219-
config = read_config(database_config_template())
220-
event_config = read_config(ramp_config_template())
221-
dispatcher = Dispatcher(config, event_config, n_workers=-1, hunger_policy="exit")
222-
dispatcher.launch()
223-
session_toy_db.commit()
224-
225-
assert get_leaderboard(session_toy_db, "new", "iris_test") is None
226-
# the iris dataset has a single submission which is failing
227-
leaderboard_failed = get_leaderboard(session_toy_db, "failed", "iris_test")
228-
assert leaderboard_failed.count("<tr>") == 2
229-
leaderboard_failed = get_leaderboard(
230-
session_toy_db, "failed", "iris_test", "test_user"
231-
)
232-
assert leaderboard_failed.count("<tr>") == 1
233-
# check that we have a link to the log of the failed submission
234-
assert re.match(r".*<a href=/.*/error.txt>.*", leaderboard_failed, flags=re.DOTALL)
235-
236-
# the remaining submission should be successful
237-
leaderboard_public = get_leaderboard(session_toy_db, "public", "iris_test")
238-
assert leaderboard_public.count("<tr>") == 4
239-
leaderboard_public = get_leaderboard(
240-
session_toy_db, "public", "iris_test", "test_user"
241-
)
242-
assert leaderboard_public.count("<tr>") == 2
243-
244-
leaderboard_private = get_leaderboard(session_toy_db, "private", "iris_test")
245-
assert leaderboard_private.count("<tr>") == 4
246-
leaderboard_private = get_leaderboard(
247-
session_toy_db, "private", "iris_test", "test_user"
248-
)
249-
assert leaderboard_private.count("<tr>") == 2
250-
251-
# the competition leaderboard will have the best solution for each user
252-
competition_public = get_leaderboard(
253-
session_toy_db, "public competition", "iris_test"
254-
)
255-
assert competition_public.count("<tr>") == 2
256-
competition_private = get_leaderboard(
257-
session_toy_db, "private competition", "iris_test"
258-
)
259-
assert competition_private.count("<tr>") == 2
260-
261-
# check the difference between the public and private leaderboard
262-
assert leaderboard_private.count("<td>") > leaderboard_public.count("<td>")
263-
for private_term in ["bag", "mean", "std", "private"]:
264-
assert private_term not in leaderboard_public
265-
assert private_term in leaderboard_private
266-
267-
# check the column name in each leaderboard
268-
assert (
269-
"""<th>submission ID</th>
270-
<th>team</th>
271-
<th>submission</th>
272-
<th>bag public acc</th>
273-
<th>mean public acc</th>
274-
<th>std public acc</th>
275-
<th>bag public error</th>
276-
<th>mean public error</th>
277-
<th>std public error</th>
278-
<th>bag public nll</th>
279-
<th>mean public nll</th>
280-
<th>std public nll</th>
281-
<th>bag public f1_70</th>
282-
<th>mean public f1_70</th>
283-
<th>std public f1_70</th>
284-
<th>bag private acc</th>
285-
<th>mean private acc</th>
286-
<th>std private acc</th>
287-
<th>bag private error</th>
288-
<th>mean private error</th>
289-
<th>std private error</th>
290-
<th>bag private nll</th>
291-
<th>mean private nll</th>
292-
<th>std private nll</th>
293-
<th>bag private f1_70</th>
294-
<th>mean private f1_70</th>
295-
<th>std private f1_70</th>
296-
<th>train time [s]</th>
297-
<th>validation time [s]</th>
298-
<th>test time [s]</th>
299-
<th>max RAM [MB]</th>
300-
<th>submitted at (UTC)</th>"""
301-
in leaderboard_private
302-
)
303-
assert (
304-
"""<th>team</th>
305-
<th>submission</th>
306-
<th>acc</th>
307-
<th>error</th>
308-
<th>nll</th>
309-
<th>f1_70</th>
310-
<th>train time [s]</th>
311-
<th>validation time [s]</th>
312-
<th>max RAM [MB]</th>
313-
<th>submitted at (UTC)</th>"""
314-
in leaderboard_public
315-
)
316-
assert (
317-
"""<th>team</th>
318-
<th>submission</th>
319-
<th>id</th>
320-
<th>submitted at (UTC)</th>
321-
<th>error</th>"""
322-
in leaderboard_failed
323-
)
324-
325-
# check the same for the competition leaderboard
326-
assert (
327-
"""<th>rank</th>
328-
<th>team</th>
329-
<th>submission</th>
330-
<th>acc</th>
331-
<th>train time [s]</th>
332-
<th>validation time [s]</th>
333-
<th>submitted at (UTC)</th>"""
334-
in competition_public
335-
)
336-
assert (
337-
"""<th>rank</th>
338-
<th>move</th>
339-
<th>team</th>
340-
<th>submission</th>
341-
<th>acc</th>
342-
<th>train time [s]</th>
343-
<th>validation time [s]</th>
344-
<th>test time [s]</th>
345-
<th>submitted at (UTC)</th>"""
346-
in competition_private
347-
)
348-
349-
350350
@pytest.mark.parametrize(
351351
"event_name, expected_size",
352352
[("iris_test", 4), ("iris_aws_test", 0), ("boston_housing_test", 0)],

0 commit comments

Comments
 (0)