@@ -255,6 +255,7 @@ def _evaluate_with_experiment(
255
255
prompt_template : Optional [str ] = None ,
256
256
experiment_run_name : Optional [str ] = None ,
257
257
response_column_name : Optional [str ] = None ,
258
+ retry_timeout : float = 600.0 ,
258
259
) -> EvalResult :
259
260
"""Runs an evaluation for the EvalTask with an experiment.
260
261
@@ -270,6 +271,8 @@ def _evaluate_with_experiment(
270
271
unique experiment run name is used.
271
272
response_column_name: The column name of model response in the dataset. If
272
273
provided, this will override the `response_column_name` of the `EvalTask`.
274
+ retry_timeout: How long to keep retrying the evaluation requests for
275
+ the whole evaluation dataset, in seconds.
273
276
274
277
Returns:
275
278
The evaluation result.
@@ -285,6 +288,7 @@ def _evaluate_with_experiment(
285
288
content_column_name = self .content_column_name ,
286
289
reference_column_name = self .reference_column_name ,
287
290
response_column_name = response_column_name ,
291
+ retry_timeout = retry_timeout ,
288
292
)
289
293
290
294
eval_result .summary_metrics = {
@@ -342,7 +346,11 @@ def evaluate(
342
346
experiment = self .experiment , backing_tensorboard = False
343
347
)
344
348
eval_result = self ._evaluate_with_experiment (
345
- model , prompt_template , experiment_run_name , response_column_name
349
+ model ,
350
+ prompt_template ,
351
+ experiment_run_name ,
352
+ response_column_name ,
353
+ retry_timeout ,
346
354
)
347
355
metadata ._experiment_tracker .set_experiment (
348
356
experiment = global_experiment_name , backing_tensorboard = False
@@ -352,12 +360,20 @@ def evaluate(
352
360
experiment = self .experiment , backing_tensorboard = False
353
361
)
354
362
eval_result = self ._evaluate_with_experiment (
355
- model , prompt_template , experiment_run_name , response_column_name
363
+ model ,
364
+ prompt_template ,
365
+ experiment_run_name ,
366
+ response_column_name ,
367
+ retry_timeout ,
356
368
)
357
369
metadata ._experiment_tracker .reset ()
358
370
elif not self .experiment and global_experiment_name :
359
371
eval_result = self ._evaluate_with_experiment (
360
- model , prompt_template , experiment_run_name , response_column_name
372
+ model ,
373
+ prompt_template ,
374
+ experiment_run_name ,
375
+ response_column_name ,
376
+ retry_timeout ,
361
377
)
362
378
else :
363
379
eval_result = _evaluation .evaluate (
0 commit comments