22
22
from google .api_core import exceptions
23
23
import vertexai
24
24
from google .cloud .aiplatform import base
25
+ from google .cloud .aiplatform import utils
25
26
from google .cloud .aiplatform .metadata import metadata
26
27
from vertexai import generative_models
27
28
from vertexai .evaluation import _base as eval_base
28
29
from vertexai .evaluation import _evaluation
29
30
from vertexai .evaluation import constants
30
- from vertexai .evaluation import utils
31
+ from vertexai .evaluation import utils as eval_utils
31
32
from vertexai .evaluation .metrics import (
32
33
_base as metrics_base ,
33
34
)
@@ -289,10 +290,10 @@ def __init__(
289
290
evaluation results.
290
291
"""
291
292
self ._raw_dataset = dataset
292
- self ._dataset = utils .load_dataset (dataset )
293
+ self ._dataset = eval_utils .load_dataset (dataset )
293
294
self ._metrics = metrics
294
295
self ._experiment = experiment
295
- self ._metric_column_mapping = utils .initialize_metric_column_mapping (
296
+ self ._metric_column_mapping = eval_utils .initialize_metric_column_mapping (
296
297
metric_column_mapping , self ._dataset
297
298
)
298
299
self .output_uri_prefix = output_uri_prefix
@@ -320,6 +321,7 @@ def _evaluate_with_experiment(
320
321
experiment_run_name : Optional [str ] = None ,
321
322
evaluation_service_qps : Optional [float ] = None ,
322
323
retry_timeout : float = 120.0 ,
324
+ output_file_name : Optional [str ] = None ,
323
325
) -> EvalResult :
324
326
"""Runs an evaluation for the EvalTask with an experiment.
325
327
@@ -336,13 +338,19 @@ def _evaluate_with_experiment(
336
338
evaluation_service_qps: The custom QPS limit for the evaluation service.
337
339
retry_timeout: How long to keep retrying the evaluation requests for
338
340
the whole evaluation dataset, in seconds.
341
+ output_file_name: The file name with csv suffix to store the output
342
+ metrics_table to be tracked in the experiment run.
339
343
340
344
Returns:
341
345
The evaluation result.
342
346
"""
343
347
self ._validate_experiment_run ()
344
348
with vertexai .preview .start_run (experiment_run_name ):
345
- self ._log_eval_experiment_param (model , prompt_template )
349
+ self ._log_eval_experiment_param (
350
+ model = model ,
351
+ prompt_template = prompt_template ,
352
+ output_file_name = output_file_name ,
353
+ )
346
354
eval_result = _evaluation .evaluate (
347
355
dataset = self ._dataset ,
348
356
metrics = self ._metrics ,
@@ -413,6 +421,8 @@ def evaluate(
413
421
"`vertexai.init(experiment='experiment_name')`for logging this"
414
422
" evaluation run."
415
423
)
424
+ if self .output_uri_prefix and not output_file_name :
425
+ output_file_name = f"eval_results_{ utils .timestamped_unique_name ()} .csv"
416
426
self ._verify_and_set_response_column_name (
417
427
response_column_name = response_column_name ,
418
428
metric_column_mapping_key = constants .Dataset .MODEL_RESPONSE_COLUMN ,
@@ -433,6 +443,7 @@ def evaluate(
433
443
experiment_run_name = experiment_run_name ,
434
444
evaluation_service_qps = evaluation_service_qps ,
435
445
retry_timeout = retry_timeout ,
446
+ output_file_name = output_file_name ,
436
447
)
437
448
metadata ._experiment_tracker .set_experiment (
438
449
experiment = global_experiment_name ,
@@ -449,6 +460,7 @@ def evaluate(
449
460
experiment_run_name = experiment_run_name ,
450
461
evaluation_service_qps = evaluation_service_qps ,
451
462
retry_timeout = retry_timeout ,
463
+ output_file_name = output_file_name ,
452
464
)
453
465
metadata ._experiment_tracker .reset ()
454
466
elif not self ._experiment and global_experiment_name :
@@ -458,6 +470,7 @@ def evaluate(
458
470
experiment_run_name = experiment_run_name ,
459
471
evaluation_service_qps = evaluation_service_qps ,
460
472
retry_timeout = retry_timeout ,
473
+ output_file_name = output_file_name ,
461
474
)
462
475
else :
463
476
eval_result = _evaluation .evaluate (
@@ -490,7 +503,7 @@ def evaluate(
490
503
if isinstance (self ._raw_dataset , str ):
491
504
dataset_uri = self ._raw_dataset
492
505
493
- utils .upload_evaluation_results (
506
+ eval_utils .upload_evaluation_results (
494
507
eval_result ,
495
508
self .output_uri_prefix ,
496
509
output_file_name ,
@@ -513,35 +526,41 @@ def _log_eval_experiment_param(
513
526
self ,
514
527
model : Optional [Union [GenerativeModel , Callable [[str ], str ]]] = None ,
515
528
prompt_template : Optional [str ] = None ,
529
+ output_file_name : Optional [str ] = None ,
516
530
) -> None :
517
531
"""Logs variable input parameters of an evaluation to an experiment run."""
518
- model_metadata = {}
532
+ eval_metadata = {}
519
533
520
534
if prompt_template is not None :
521
- model_metadata .update ({"prompt_template" : prompt_template })
535
+ eval_metadata .update ({"prompt_template" : prompt_template })
522
536
523
537
if isinstance (model , GenerativeModel ):
524
- model_metadata .update (
538
+ eval_metadata .update (
525
539
{
526
540
"model_name" : model ._model_name ,
527
541
}
528
542
)
529
543
530
544
if model ._generation_config and isinstance (model ._generation_config , dict ):
531
- model_metadata .update (** model ._generation_config )
545
+ eval_metadata .update (** model ._generation_config )
532
546
533
547
if model ._safety_settings and isinstance (model ._safety_settings , dict ):
534
548
safety_settings = model ._safety_settings
535
549
safety_settings_as_str = {
536
550
category .name : threshold .name
537
551
for category , threshold in safety_settings .items ()
538
552
}
539
- model_metadata .update (safety_settings_as_str )
553
+ eval_metadata .update (safety_settings_as_str )
554
+
555
+ if self .output_uri_prefix and output_file_name :
556
+ eval_metadata .update (
557
+ {"output_file" : self .output_uri_prefix + "/" + output_file_name }
558
+ )
540
559
541
- if model_metadata :
542
- _LOGGER .info (f"Logging Eval Experiment metadata: { model_metadata } " )
560
+ if eval_metadata :
561
+ _LOGGER .info (f"Logging Eval Experiment metadata: { eval_metadata } " )
543
562
try :
544
- vertexai .preview .log_params (model_metadata )
563
+ vertexai .preview .log_params (eval_metadata )
545
564
except (ValueError , TypeError ) as e :
546
565
_LOGGER .warning (f"Experiment metadata logging failed: { str (e )} " )
547
566
0 commit comments