|
317 | 317 | )
|
318 | 318 | ),
|
319 | 319 | )
|
| 320 | +_MOCK_PAIRWISE_RESULT = ( |
| 321 | + gapic_evaluation_service_types.EvaluateInstancesResponse( |
| 322 | + pairwise_metric_result=gapic_evaluation_service_types.PairwiseMetricResult( |
| 323 | + pairwise_choice=gapic_evaluation_service_types.PairwiseChoice.BASELINE, |
| 324 | + explanation="explanation", |
| 325 | + ) |
| 326 | + ), |
| 327 | + gapic_evaluation_service_types.EvaluateInstancesResponse( |
| 328 | + pairwise_metric_result=gapic_evaluation_service_types.PairwiseMetricResult( |
| 329 | + pairwise_choice=gapic_evaluation_service_types.PairwiseChoice.BASELINE, |
| 330 | + explanation="explanation", |
| 331 | + ) |
| 332 | + ), |
| 333 | +) |
320 | 334 | _MOCK_SUMMARIZATION_QUALITY_RESULT = (
|
321 | 335 | gapic_evaluation_service_types.EvaluateInstancesResponse(
|
322 | 336 | pointwise_metric_result=gapic_evaluation_service_types.PointwiseMetricResult(
|
@@ -1216,6 +1230,40 @@ def test_evaluate_baseline_response_column_and_baseline_model_provided(self):
|
1216 | 1230 | test_eval_task.evaluate(model=mock.MagicMock())
|
1217 | 1231 | _TEST_PAIRWISE_METRIC._baseline_model = None
|
1218 | 1232 |
|
| 1233 | + def test_evaluate_baseline_model_provided_but_no_baseline_response_column(self): |
| 1234 | + mock_baseline_model = mock.create_autospec( |
| 1235 | + generative_models.GenerativeModel, instance=True |
| 1236 | + ) |
| 1237 | + mock_baseline_model.generate_content.return_value = ( |
| 1238 | + _MOCK_MODEL_INFERENCE_RESPONSE |
| 1239 | + ) |
| 1240 | + mock_baseline_model._model_name = "publishers/google/model/gemini-pro" |
| 1241 | + _TEST_PAIRWISE_METRIC._baseline_model = mock_baseline_model |
| 1242 | + |
| 1243 | + mock_candidate_model = mock.create_autospec( |
| 1244 | + generative_models.GenerativeModel, instance=True |
| 1245 | + ) |
| 1246 | + mock_candidate_model.generate_content.return_value = ( |
| 1247 | + _MOCK_MODEL_INFERENCE_RESPONSE |
| 1248 | + ) |
| 1249 | + mock_candidate_model._model_name = "publishers/google/model/gemini-1.0-pro" |
| 1250 | + mock_metric_results = _MOCK_PAIRWISE_RESULT |
| 1251 | + eval_dataset = _TEST_EVAL_DATASET_WITHOUT_RESPONSE.copy(deep=True) |
| 1252 | + test_eval_task = EvalTask( |
| 1253 | + dataset=eval_dataset, |
| 1254 | + metrics=[_TEST_PAIRWISE_METRIC], |
| 1255 | + ) |
| 1256 | + with mock.patch.object( |
| 1257 | + target=gapic_evaluation_services.EvaluationServiceClient, |
| 1258 | + attribute="evaluate_instances", |
| 1259 | + side_effect=mock_metric_results, |
| 1260 | + ): |
| 1261 | + test_result = test_eval_task.evaluate( |
| 1262 | + model=mock_candidate_model, |
| 1263 | + ) |
| 1264 | + _TEST_PAIRWISE_METRIC._baseline_model = None |
| 1265 | + assert test_result.summary_metrics["row_count"] == 2 |
| 1266 | + |
1219 | 1267 | def test_evaluate_response_column_and_model_not_provided(self):
|
1220 | 1268 | test_eval_task = EvalTask(
|
1221 | 1269 | dataset=_TEST_EVAL_DATASET_SINGLE,
|
|
0 commit comments