Skip to content

Commit 9ee9289

Browse files
jsondaicopybara-github
authored andcommitted
feat: Update Rapid Evaluation Service QPS. Add a customizable evaluation service QPS parameter.
PiperOrigin-RevId: 656085181
1 parent 0621306 commit 9ee9289

File tree

6 files changed

+35
-12
lines changed

6 files changed

+35
-12
lines changed

vertexai/preview/evaluation/_base.py

+2
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@ class EvaluationRunConfig:
4040
metrics: The list of metric names, or metric bundle names, or Metric instances to evaluate.
4141
column_map: The dictionary of column name overrides in the dataset.
4242
client: The evaluation service client.
43+
evaluation_service_qps: The custom QPS limit for the evaluation service.
4344
retry_timeout: How long to keep retrying the evaluation requests, in seconds.
4445
"""
4546

4647
dataset: "pd.DataFrame"
4748
metrics: List[Union[str, metrics_base._Metric]]
4849
column_map: Dict[str, str]
4950
client: gapic_evaluation_services.EvaluationServiceClient
51+
evaluation_service_qps: float
5052
retry_timeout: float
5153

5254
def validate_dataset_column(self, column_name: str) -> None:

vertexai/preview/evaluation/_eval_tasks.py

+9
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ def _evaluate_with_experiment(
255255
prompt_template: Optional[str] = None,
256256
experiment_run_name: Optional[str] = None,
257257
response_column_name: Optional[str] = None,
258+
evaluation_service_qps: Optional[float] = None,
258259
retry_timeout: float = 600.0,
259260
) -> EvalResult:
260261
"""Runs an evaluation for the EvalTask with an experiment.
@@ -271,6 +272,7 @@ def _evaluate_with_experiment(
271272
unique experiment run name is used.
272273
response_column_name: The column name of model response in the dataset. If
273274
provided, this will override the `response_column_name` of the `EvalTask`.
275+
evaluation_service_qps: The custom QPS limit for the evaluation service.
274276
retry_timeout: How long to keep retrying the evaluation requests for
275277
the whole evaluation dataset, in seconds.
276278
@@ -288,6 +290,7 @@ def _evaluate_with_experiment(
288290
content_column_name=self.content_column_name,
289291
reference_column_name=self.reference_column_name,
290292
response_column_name=response_column_name,
293+
evaluation_service_qps=evaluation_service_qps,
291294
retry_timeout=retry_timeout,
292295
)
293296

@@ -308,6 +311,7 @@ def evaluate(
308311
prompt_template: Optional[str] = None,
309312
experiment_run_name: Optional[str] = None,
310313
response_column_name: Optional[str] = None,
314+
evaluation_service_qps: Optional[float] = None,
311315
retry_timeout: float = 600.0,
312316
) -> EvalResult:
313317
"""Runs an evaluation for the EvalTask.
@@ -324,6 +328,7 @@ def evaluate(
324328
unique experiment run name is used.
325329
response_column_name: The column name of model response in the dataset. If
326330
provided, this will override the `response_column_name` of the `EvalTask`.
331+
evaluation_service_qps: The custom QPS limit for the evaluation service.
327332
retry_timeout: How long to keep retrying the evaluation requests for
328333
the whole evaluation dataset, in seconds.
329334
@@ -350,6 +355,7 @@ def evaluate(
350355
prompt_template,
351356
experiment_run_name,
352357
response_column_name,
358+
evaluation_service_qps,
353359
retry_timeout,
354360
)
355361
metadata._experiment_tracker.set_experiment(
@@ -364,6 +370,7 @@ def evaluate(
364370
prompt_template,
365371
experiment_run_name,
366372
response_column_name,
373+
evaluation_service_qps,
367374
retry_timeout,
368375
)
369376
metadata._experiment_tracker.reset()
@@ -373,6 +380,7 @@ def evaluate(
373380
prompt_template,
374381
experiment_run_name,
375382
response_column_name,
383+
evaluation_service_qps,
376384
retry_timeout,
377385
)
378386
else:
@@ -384,6 +392,7 @@ def evaluate(
384392
content_column_name=self.content_column_name,
385393
reference_column_name=self.reference_column_name,
386394
response_column_name=response_column_name,
395+
evaluation_service_qps=evaluation_service_qps,
387396
retry_timeout=retry_timeout,
388397
)
389398
return eval_result

vertexai/preview/evaluation/_evaluation.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,7 @@ def _generate_response_from_gemini_model(
295295
evaluation_run_config: Evaluation Run Configurations.
296296
is_baseline_model: Whether the model is a baseline model for PairwiseMetric.
297297
"""
298-
max_workers = int(
299-
constants.QuotaLimit.GEMINI_1_0_PRO_GENERATE_CONTENT_REQUESTS_PER_MINUTE / 2
300-
)
298+
301299
# Ensure thread safety and avoid race conditions.
302300
df = evaluation_run_config.dataset.copy()
303301

@@ -310,7 +308,7 @@ def _generate_response_from_gemini_model(
310308
constants.Dataset.COMPLETED_PROMPT_COLUMN
311309
in evaluation_run_config.dataset.columns
312310
):
313-
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
311+
with futures.ThreadPoolExecutor(max_workers=constants.MAX_WORKERS) as executor:
314312
for _, row in df.iterrows():
315313
tasks.append(
316314
executor.submit(
@@ -323,7 +321,7 @@ def _generate_response_from_gemini_model(
323321
content_column_name = evaluation_run_config.column_map[
324322
constants.Dataset.CONTENT_COLUMN
325323
]
326-
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
324+
with futures.ThreadPoolExecutor(max_workers=constants.MAX_WORKERS) as executor:
327325
for _, row in df.iterrows():
328326
tasks.append(
329327
executor.submit(
@@ -609,9 +607,10 @@ def _compute_metrics(
609607

610608
instance_list = []
611609
futures_by_metric = collections.defaultdict(list)
612-
eval_max_workers = constants.QuotaLimit.EVAL_SERVICE_QPS
610+
611+
rate_limiter = utils.RateLimiter(evaluation_run_config.evaluation_service_qps)
613612
with tqdm(total=api_request_count) as pbar:
614-
with futures.ThreadPoolExecutor(max_workers=eval_max_workers) as executor:
613+
with futures.ThreadPoolExecutor(max_workers=constants.MAX_WORKERS) as executor:
615614
for idx, row in evaluation_run_config.dataset.iterrows():
616615
row_dict = _compute_custom_metrics(row.to_dict(), custom_metrics)
617616

@@ -626,6 +625,7 @@ def _compute_metrics(
626625
row_dict=row_dict,
627626
evaluation_run_config=evaluation_run_config,
628627
),
628+
rate_limiter=rate_limiter,
629629
retry_timeout=evaluation_run_config.retry_timeout,
630630
)
631631
future.add_done_callback(lambda _: pbar.update(1))
@@ -686,6 +686,7 @@ def evaluate(
686686
response_column_name: str = "response",
687687
context_column_name: str = "context",
688688
instruction_column_name: str = "instruction",
689+
evaluation_service_qps: Optional[float] = None,
689690
retry_timeout: float = 600.0,
690691
) -> evaluation_base.EvalResult:
691692
"""Runs the evaluation for metrics.
@@ -712,6 +713,7 @@ def evaluate(
712713
not set, default to `context`.
713714
instruction_column_name: The column name of the instruction prompt in the
714715
dataset. If not set, default to `instruction`.
716+
evaluation_service_qps: The custom QPS limit for the evaluation service.
715717
retry_timeout: How long to keep retrying the evaluation requests for the
716718
whole evaluation dataset, in seconds.
717719
Returns:
@@ -741,6 +743,9 @@ def evaluate(
741743
constants.Dataset.INSTRUCTION_COLUMN: instruction_column_name,
742744
},
743745
client=utils.create_evaluation_service_client(),
746+
evaluation_service_qps=evaluation_service_qps
747+
if evaluation_service_qps
748+
else constants.QuotaLimit.EVAL_SERVICE_QPS,
744749
retry_timeout=retry_timeout,
745750
)
746751

vertexai/preview/evaluation/constants.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
"""Constants for evaluation."""
1818
import dataclasses
1919

20+
# The number of concurrent workers to use for making model inference and
21+
# evaluation requests.
22+
MAX_WORKERS = 100
23+
2024

2125
@dataclasses.dataclass(frozen=True)
2226
class Metric:
@@ -193,4 +197,7 @@ class QuotaLimit:
193197
# Default queries per minute (QPM) quota for `gemini-1.0-pro` base model.
194198
GEMINI_1_0_PRO_GENERATE_CONTENT_REQUESTS_PER_MINUTE = 300
195199

196-
EVAL_SERVICE_QPS = 10
200+
# Evaluation Service QPS limit can be computed by
201+
# (GEMINI_1_5_PRO_GENERATE_CONTENT_REQUESTS_QPM / 60 / Number of Samples)
202+
# 0.25 = 300 / 60 / 4
203+
EVAL_SERVICE_QPS = 0.25

vertexai/preview/evaluation/metrics/_instance_evaluation.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -620,24 +620,24 @@ def handle_response(
620620
return result
621621

622622

623-
# TODO(b/346659152): Add interface to customize rate limit.
624-
@utils.rate_limit(constants.QuotaLimit.EVAL_SERVICE_QPS)
625623
def evaluate_instances(
626624
client: gapic_evaluation_services.EvaluationServiceClient,
627625
request: gapic_eval_service_types.EvaluateInstancesRequest,
626+
rate_limiter: utils.RateLimiter,
628627
retry_timeout: float,
629628
) -> gapic_eval_service_types.EvaluateInstancesResponse:
630629
"""Evaluates an instance.
631630
632631
Args:
633632
client: The client to use for evaluation.
634633
request: An EvaluateInstancesRequest.
634+
rate_limiter: The rate limiter to use for evaluation service requests.
635635
retry_timeout: How long to keep retrying the evaluation requests, in seconds.
636636
637637
Returns:
638638
A response from the evaluation service.
639639
"""
640-
640+
rate_limiter.sleep_and_advance()
641641
return client.evaluate_instances(
642642
request=request,
643643
retry=api_core.retry.Retry(

vertexai/preview/evaluation/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def __init__(self, rate: Optional[float] = None):
7373
Raises:
7474
ValueError: If the rate is not positive.
7575
"""
76-
if rate <= 0:
76+
if not rate or rate <= 0:
7777
raise ValueError("Rate must be a positive number")
7878
self.seconds_per_event = 1.0 / rate
7979
self.last = time.time() - self.seconds_per_event

0 commit comments

Comments
 (0)