Skip to content

Commit 61b8e4d

Browse files
authored
Remove result_df from mlflow.genai.evaluation result (mlflow#15896)
Signed-off-by: B-Step62 <[email protected]> Signed-off-by: Yuki Watanabe <[email protected]>
1 parent b2fc238 commit 61b8e4d

File tree

3 files changed

+12
-44
lines changed

3 files changed

+12
-44
lines changed

mlflow/genai/evaluation/base.py

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import logging
22
import warnings
3-
from dataclasses import dataclass
43
from typing import TYPE_CHECKING, Any, Callable, Optional
54

65
import mlflow
@@ -14,6 +13,7 @@
1413
from mlflow.genai.scorers.validation import valid_data_for_builtin_scorers, validate_scorers
1514
from mlflow.genai.utils.trace_utils import convert_predict_fn
1615
from mlflow.models.evaluation.base import (
16+
EvaluationResult,
1717
_is_model_deployment_endpoint_uri,
1818
)
1919
from mlflow.utils.annotations import experimental
@@ -22,24 +22,10 @@
2222
if TYPE_CHECKING:
2323
from genai.evaluation.utils import EvaluationDatasetTypes
2424

25-
try:
26-
# `pandas` is not required for `mlflow-skinny`.
27-
import pandas as pd
28-
except ImportError:
29-
pass
30-
3125

3226
logger = logging.getLogger(__name__)
3327

3428

35-
@experimental
36-
@dataclass
37-
class EvaluationResult:
38-
run_id: str
39-
metrics: dict[str, float]
40-
result_df: "pd.DataFrame"
41-
42-
4329
@experimental
4430
def evaluate(
4531
data: "EvaluationDatasetTypes",
@@ -218,6 +204,9 @@ def predict_fn(question: str) -> str:
218204
the evaluation results. Can be also set globally via the
219205
:py:func:`mlflow.set_active_model` function.
220206
207+
Returns:
208+
An :py:class:`mlflow.models.EvaluationResult~` object.
209+
221210
Note:
222211
This function is only supported on Databricks. The tracking URI must be
223212
set to Databricks.
@@ -288,7 +277,7 @@ def predict_fn(question: str) -> str:
288277
module="mlflow.data.evaluation_dataset",
289278
)
290279

291-
result = mlflow.models.evaluate(
280+
return mlflow.models.evaluate(
292281
model=predict_fn,
293282
data=data,
294283
evaluator_config=evaluation_config,
@@ -298,12 +287,6 @@ def predict_fn(question: str) -> str:
298287
_called_from_genai_evaluate=True,
299288
)
300289

301-
return EvaluationResult(
302-
run_id=result._run_id,
303-
metrics=result.metrics,
304-
result_df=result.tables["eval_results"],
305-
)
306-
307290

308291
@experimental
309292
def to_predict_fn(endpoint_uri: str) -> Callable:

mlflow/models/evaluation/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,13 @@ def artifacts(self) -> dict[str, "mlflow.models.EvaluationArtifact"]:
670670
"""
671671
return self._artifacts
672672

673+
@property
674+
def run_id(self) -> str:
675+
"""
676+
The ID of the MLflow Run to which the evaluation results were logged.
677+
"""
678+
return self._run_id
679+
673680
@property
674681
def tables(self) -> dict[str, "pd.DataFrame"]:
675682
"""

tests/genai/test_scorer.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -204,30 +204,8 @@ def dummy_scorer(inputs, outputs):
204204
data=sample_data,
205205
scorers=[dummy_scorer],
206206
)
207-
208207
assert any("metric/dummy_scorer" in metric for metric in results.metrics.keys())
209208

210-
dummy_scorer_cols = [
211-
col for col in results.result_df.keys() if "dummy_scorer" in col and "value" in col
212-
]
213-
dummy_scorer_values = set()
214-
for col in dummy_scorer_cols:
215-
for _val in results.result_df[col]:
216-
dummy_scorer_values.add(_val)
217-
218-
scorer_return_values = set()
219-
if isinstance(scorer_return, list):
220-
for _assessment in scorer_return:
221-
scorer_return_values.add(_assessment.feedback.value)
222-
elif isinstance(scorer_return, Assessment):
223-
scorer_return_values.add(scorer_return.feedback.value)
224-
elif isinstance(scorer_return, mlflow.evaluation.Assessment):
225-
scorer_return_values.add(scorer_return.value)
226-
else:
227-
scorer_return_values.add(scorer_return)
228-
229-
assert dummy_scorer_values == scorer_return_values
230-
231209

232210
def test_scorer_returns_feedback_with_error(sample_data):
233211
@scorer

0 commit comments

Comments
 (0)