@@ -295,9 +295,7 @@ def _generate_response_from_gemini_model(
295
295
evaluation_run_config: Evaluation Run Configurations.
296
296
is_baseline_model: Whether the model is a baseline model for PairwiseMetric.
297
297
"""
298
- max_workers = int (
299
- constants .QuotaLimit .GEMINI_1_0_PRO_GENERATE_CONTENT_REQUESTS_PER_MINUTE / 2
300
- )
298
+
301
299
# Ensure thread safety and avoid race conditions.
302
300
df = evaluation_run_config .dataset .copy ()
303
301
@@ -310,7 +308,7 @@ def _generate_response_from_gemini_model(
310
308
constants .Dataset .COMPLETED_PROMPT_COLUMN
311
309
in evaluation_run_config .dataset .columns
312
310
):
313
- with futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
311
+ with futures .ThreadPoolExecutor (max_workers = constants . MAX_WORKERS ) as executor :
314
312
for _ , row in df .iterrows ():
315
313
tasks .append (
316
314
executor .submit (
@@ -323,7 +321,7 @@ def _generate_response_from_gemini_model(
323
321
content_column_name = evaluation_run_config .column_map [
324
322
constants .Dataset .CONTENT_COLUMN
325
323
]
326
- with futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
324
+ with futures .ThreadPoolExecutor (max_workers = constants . MAX_WORKERS ) as executor :
327
325
for _ , row in df .iterrows ():
328
326
tasks .append (
329
327
executor .submit (
@@ -609,9 +607,10 @@ def _compute_metrics(
609
607
610
608
instance_list = []
611
609
futures_by_metric = collections .defaultdict (list )
612
- eval_max_workers = constants .QuotaLimit .EVAL_SERVICE_QPS
610
+
611
+ rate_limiter = utils .RateLimiter (evaluation_run_config .evaluation_service_qps )
613
612
with tqdm (total = api_request_count ) as pbar :
614
- with futures .ThreadPoolExecutor (max_workers = eval_max_workers ) as executor :
613
+ with futures .ThreadPoolExecutor (max_workers = constants . MAX_WORKERS ) as executor :
615
614
for idx , row in evaluation_run_config .dataset .iterrows ():
616
615
row_dict = _compute_custom_metrics (row .to_dict (), custom_metrics )
617
616
@@ -626,6 +625,7 @@ def _compute_metrics(
626
625
row_dict = row_dict ,
627
626
evaluation_run_config = evaluation_run_config ,
628
627
),
628
+ rate_limiter = rate_limiter ,
629
629
retry_timeout = evaluation_run_config .retry_timeout ,
630
630
)
631
631
future .add_done_callback (lambda _ : pbar .update (1 ))
@@ -686,6 +686,7 @@ def evaluate(
686
686
response_column_name : str = "response" ,
687
687
context_column_name : str = "context" ,
688
688
instruction_column_name : str = "instruction" ,
689
+ evaluation_service_qps : Optional [float ] = None ,
689
690
retry_timeout : float = 600.0 ,
690
691
) -> evaluation_base .EvalResult :
691
692
"""Runs the evaluation for metrics.
@@ -712,6 +713,7 @@ def evaluate(
712
713
not set, default to `context`.
713
714
instruction_column_name: The column name of the instruction prompt in the
714
715
dataset. If not set, default to `instruction`.
716
+ evaluation_service_qps: The custom QPS limit for the evaluation service.
715
717
retry_timeout: How long to keep retrying the evaluation requests for the
716
718
whole evaluation dataset, in seconds.
717
719
Returns:
@@ -741,6 +743,9 @@ def evaluate(
741
743
constants .Dataset .INSTRUCTION_COLUMN : instruction_column_name ,
742
744
},
743
745
client = utils .create_evaluation_service_client (),
746
+ evaluation_service_qps = evaluation_service_qps
747
+ if evaluation_service_qps
748
+ else constants .QuotaLimit .EVAL_SERVICE_QPS ,
744
749
retry_timeout = retry_timeout ,
745
750
)
746
751
0 commit comments