Skip to content

Commit d6ef500

Browse files
jsondaicopybara-github
authored andcommitted
fix: add validation for evaluation dataset fields, update logging info for eval api request count
PiperOrigin-RevId: 633729236
1 parent 7ff8071 commit d6ef500

File tree

3 files changed

+33
-4
lines changed

3 files changed

+33
-4
lines changed

tests/unit/vertexai/test_evaluation.py

+13
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,19 @@ def test_evaluate_pairwise_metrics_with_multiple_baseline_models(self):
571571
):
572572
test_eval_task.evaluate(model=mock_candidate_model)
573573

574+
def test_evaluate_invalid_model_and_dataset_input(self):
575+
test_eval_task = evaluation.EvalTask(
576+
dataset=_TEST_EVAL_DATASET,
577+
metrics=_TEST_METRICS,
578+
)
579+
with pytest.raises(
580+
ValueError,
581+
match=("The `model` parameter is specified, but the evaluation `dataset`"),
582+
):
583+
test_eval_task.evaluate(
584+
model=generative_models.GenerativeModel(model_name="invalid_model_name")
585+
)
586+
574587

575588
@pytest.mark.usefixtures("google_auth_mock")
576589
class TestEvaluationUtils:

vertexai/preview/evaluation/_eval_tasks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class EvalTask:
7979
documentation page [Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
8080
8181
Usage:
82-
1. To perform bring your own prediction evaluation, provide the model
82+
1. To perform bring-your-own-prediction(BYOP) evaluation, provide the model
8383
responses in the response column in the dataset. The response column name
8484
is "response" by default, or specify `response_column_name` parameter to
8585
customize.

vertexai/preview/evaluation/_evaluation.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,7 @@ async def _compute_metrics(
534534
metric_name = metric
535535
tasks_by_metric[metric_name].append(task)
536536

537-
api_request_count = (len(api_metrics) + len(custom_metrics)) * len(
538-
evaluation_run_config.dataset)
537+
api_request_count = len(api_metrics) * len(evaluation_run_config.dataset)
539538
_LOGGER.info(
540539
f"Computing metrics with a total of {api_request_count} Vertex online"
541540
" evaluation service requests."
@@ -629,7 +628,8 @@ def evaluate(
629628
Raises:
630629
ValueError: If the metrics list is empty, or the prompt template is not
631630
provided for PairwiseMetric, or multiple baseline models are specified for
632-
PairwiseMetric instances.
631+
PairwiseMetric instances, or both model and dataset model response column
632+
are present.
633633
"""
634634

635635
if not metrics:
@@ -655,6 +655,22 @@ def evaluate(
655655
constants.Dataset.REFERENCE_COLUMN
656656
)
657657

658+
if (
659+
model
660+
and evaluation_run_config.column_map.get(
661+
constants.Dataset.MODEL_RESPONSE_COLUMN
662+
)
663+
in dataset.columns
664+
):
665+
raise ValueError(
666+
"The `model` parameter is specified, but the evaluation `dataset`"
667+
f" contains model response column `{response_column_name}` to perform"
668+
" bring-your-own-prediction(BYOP) evaluation. If you would like to"
669+
" perform rapid evaluation using the dataset with the existing model"
670+
f" response column `{response_column_name}`, please remove the"
671+
" `model` input parameter."
672+
)
673+
658674
baseline_model = None
659675
pairwise_metric_exists = any(
660676
isinstance(metric, metrics_base.PairwiseMetric)

0 commit comments

Comments
 (0)