@@ -154,6 +154,49 @@ def test_inference_with_string_model_success(
154
154
}
155
155
),
156
156
)
157
+ assert inference_result .candidate_name == "gemini-pro"
158
+ assert inference_result .gcs_source is None
159
+
160
+ @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
161
+ def test_inference_with_callable_model_sets_candidate_name (
162
+ self , mock_eval_dataset_loader
163
+ ):
164
+ mock_df = pd .DataFrame ({"prompt" : ["test prompt" ]})
165
+ mock_eval_dataset_loader .return_value .load .return_value = mock_df .to_dict (
166
+ orient = "records"
167
+ )
168
+
169
+ def my_model_fn (contents ):
170
+ return "callable response"
171
+
172
+ inference_result = self .client .evals .run_inference (
173
+ model = my_model_fn ,
174
+ src = mock_df ,
175
+ )
176
+ assert inference_result .candidate_name == "my_model_fn"
177
+ assert inference_result .gcs_source is None
178
+
179
+ @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
180
+ def test_inference_with_lambda_model_candidate_name_is_none (
181
+ self , mock_eval_dataset_loader
182
+ ):
183
+ mock_df = pd .DataFrame ({"prompt" : ["test prompt" ]})
184
+ mock_eval_dataset_loader .return_value .load .return_value = mock_df .to_dict (
185
+ orient = "records"
186
+ )
187
+
188
+ inference_result = self .client .evals .run_inference (
189
+ model = lambda x : "lambda response" , # pylint: disable=unnecessary-lambda
190
+ src = mock_df ,
191
+ )
192
+ # Lambdas may or may not have a __name__ depending on Python version/env
193
+ # but it's typically '<lambda>' if it exists.
194
+ # The code under test uses getattr(model, "__name__", None)
195
+ assert (
196
+ inference_result .candidate_name == "<lambda>"
197
+ or inference_result .candidate_name is None
198
+ )
199
+ assert inference_result .gcs_source is None
157
200
158
201
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
159
202
def test_inference_with_callable_model_success (self , mock_eval_dataset_loader ):
@@ -179,6 +222,8 @@ def mock_model_fn(contents):
179
222
}
180
223
),
181
224
)
225
+ assert inference_result .candidate_name == "mock_model_fn"
226
+ assert inference_result .gcs_source is None
182
227
183
228
@mock .patch .object (_evals_common , "Models" )
184
229
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -224,6 +269,8 @@ def test_inference_with_prompt_template(
224
269
}
225
270
),
226
271
)
272
+ assert inference_result .candidate_name == "gemini-pro"
273
+ assert inference_result .gcs_source is None
227
274
228
275
@mock .patch .object (_evals_common , "Models" )
229
276
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -273,6 +320,10 @@ def test_inference_with_gcs_destination(
273
320
pd .testing .assert_frame_equal (
274
321
inference_result .eval_dataset_df , expected_df_to_save
275
322
)
323
+ assert inference_result .candidate_name == "gemini-pro"
324
+ assert inference_result .gcs_source == vertexai_genai_types .GcsSource (
325
+ uris = [gcs_dest_path ]
326
+ )
276
327
277
328
@mock .patch .object (_evals_common , "Models" )
278
329
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -322,6 +373,8 @@ def test_inference_with_local_destination(
322
373
}
323
374
)
324
375
pd .testing .assert_frame_equal (inference_result .eval_dataset_df , expected_df )
376
+ assert inference_result .candidate_name == "gemini-pro"
377
+ assert inference_result .gcs_source is None
325
378
326
379
@mock .patch .object (_evals_common , "Models" )
327
380
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -405,6 +458,8 @@ def test_inference_from_request_column_save_locally(
405
458
expected_records , key = lambda x : x ["request" ]
406
459
)
407
460
os .remove (local_dest_path )
461
+ assert inference_result .candidate_name == "gemini-pro"
462
+ assert inference_result .gcs_source is None
408
463
409
464
@mock .patch .object (_evals_common , "Models" )
410
465
def test_inference_from_local_jsonl_file (self , mock_models ):
@@ -478,6 +533,8 @@ def test_inference_from_local_jsonl_file(self, mock_models):
478
533
any_order = True ,
479
534
)
480
535
os .remove (local_src_path )
536
+ assert inference_result .candidate_name == "gemini-pro"
537
+ assert inference_result .gcs_source is None
481
538
482
539
@mock .patch .object (_evals_common , "Models" )
483
540
def test_inference_from_local_csv_file (self , mock_models ):
@@ -548,6 +605,8 @@ def test_inference_from_local_csv_file(self, mock_models):
548
605
any_order = True ,
549
606
)
550
607
os .remove (local_src_path )
608
+ assert inference_result .candidate_name == "gemini-pro"
609
+ assert inference_result .gcs_source is None
551
610
552
611
@mock .patch .object (_evals_common , "Models" )
553
612
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -719,6 +778,8 @@ def mock_generate_content_logic(*args, **kwargs):
719
778
expected_df .sort_values (by = "id" ).reset_index (drop = True ),
720
779
check_dtype = False ,
721
780
)
781
+ assert inference_result .candidate_name == "gemini-pro"
782
+ assert inference_result .gcs_source is None
722
783
723
784
@mock .patch .object (_evals_common , "Models" )
724
785
@mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -794,6 +855,8 @@ def test_inference_with_multimodal_content(
794
855
}
795
856
),
796
857
)
858
+ assert inference_result .candidate_name == "gemini-pro"
859
+ assert inference_result .gcs_source is None
797
860
798
861
799
862
class TestMetricPromptBuilder :
@@ -3295,3 +3358,76 @@ def test_execute_evaluation_multiple_datasets(
3295
3358
assert summary_metric .mean_score == 1.0
3296
3359
3297
3360
assert mock_eval_dependencies ["mock_evaluate_instances" ].call_count == 2
3361
+
3362
+ def test_execute_evaluation_deduplicates_candidate_names (
3363
+ self , mock_api_client_fixture , mock_eval_dependencies
3364
+ ):
3365
+ """Tests that duplicate candidate names are indexed."""
3366
+ dataset1 = vertexai_genai_types .EvaluationDataset (
3367
+ eval_dataset_df = pd .DataFrame (
3368
+ [{"prompt" : "p1" , "response" : "r1" , "reference" : "ref1" }]
3369
+ ),
3370
+ candidate_name = "gemini-pro" ,
3371
+ )
3372
+ dataset2 = vertexai_genai_types .EvaluationDataset (
3373
+ eval_dataset_df = pd .DataFrame (
3374
+ [{"prompt" : "p1" , "response" : "r2" , "reference" : "ref1" }]
3375
+ ),
3376
+ candidate_name = "gemini-flash" ,
3377
+ )
3378
+ dataset3 = vertexai_genai_types .EvaluationDataset (
3379
+ eval_dataset_df = pd .DataFrame (
3380
+ [{"prompt" : "p1" , "response" : "r3" , "reference" : "ref1" }]
3381
+ ),
3382
+ candidate_name = "gemini-pro" ,
3383
+ )
3384
+
3385
+ mock_eval_dependencies [
3386
+ "mock_evaluate_instances"
3387
+ ].return_value = vertexai_genai_types .EvaluateInstancesResponse (
3388
+ exact_match_results = vertexai_genai_types .ExactMatchResults (
3389
+ exact_match_metric_values = [
3390
+ vertexai_genai_types .ExactMatchMetricValue (score = 1.0 )
3391
+ ]
3392
+ )
3393
+ )
3394
+
3395
+ result = _evals_common ._execute_evaluation (
3396
+ api_client = mock_api_client_fixture ,
3397
+ dataset = [dataset1 , dataset2 , dataset3 ],
3398
+ metrics = [vertexai_genai_types .Metric (name = "exact_match" )],
3399
+ )
3400
+
3401
+ assert result .metadata .candidate_names == [
3402
+ "gemini-pro #1" ,
3403
+ "gemini-flash" ,
3404
+ "gemini-pro #2" ,
3405
+ ]
3406
+
3407
+ @mock .patch ("vertexai._genai._evals_common.datetime" )
3408
+ def test_execute_evaluation_adds_creation_timestamp (
3409
+ self , mock_datetime , mock_api_client_fixture , mock_eval_dependencies
3410
+ ):
3411
+ """Tests that creation_timestamp is added to the result metadata."""
3412
+ import datetime
3413
+
3414
+ mock_now = datetime .datetime (
3415
+ 2025 , 6 , 18 , 12 , 0 , 0 , tzinfo = datetime .timezone .utc
3416
+ )
3417
+ mock_datetime .datetime .now .return_value = mock_now
3418
+
3419
+ dataset = vertexai_genai_types .EvaluationDataset (
3420
+ eval_dataset_df = pd .DataFrame (
3421
+ [{"prompt" : "p" , "response" : "r" , "reference" : "r" }]
3422
+ )
3423
+ )
3424
+ metric = vertexai_genai_types .Metric (name = "exact_match" )
3425
+
3426
+ result = _evals_common ._execute_evaluation (
3427
+ api_client = mock_api_client_fixture ,
3428
+ dataset = dataset ,
3429
+ metrics = [metric ],
3430
+ )
3431
+
3432
+ assert result .metadata is not None
3433
+ assert result .metadata .creation_timestamp == mock_now
0 commit comments