Skip to content

Commit 4742a87

Browse files
jsondaicopybara-github
authored andcommitted
feat: add 12 model-based pointwise metric classes to vertexai.preview.evaluation.metrics
PiperOrigin-RevId: 644484669
1 parent 04e07db commit 4742a87

14 files changed

+525
-1
lines changed

tests/unit/vertexai/test_evaluation.py

+70
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
from vertexai.preview.evaluation import _base as eval_base
3232
from vertexai.preview.evaluation import _evaluation
3333
from vertexai.preview.evaluation import utils
34+
from vertexai.preview.evaluation.metrics import (
35+
_summarization_quality,
36+
)
3437
from vertexai.preview.evaluation.metrics import (
3538
_pairwise_summarization_quality,
3639
)
@@ -318,6 +321,73 @@ def test_compute_pointwise_metrics(self, api_transport):
318321
0.5,
319322
]
320323

324+
@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
325+
def test_compute_pointwise_metrics_with_custom_metric_spec(self, api_transport):
326+
aiplatform.init(
327+
project=_TEST_PROJECT,
328+
location=_TEST_LOCATION,
329+
api_transport=api_transport,
330+
)
331+
eval_dataset = pd.DataFrame(
332+
{
333+
"context": ["test", "context"],
334+
"instruction": ["test", "instruction"],
335+
"reference": ["test", "reference"],
336+
}
337+
)
338+
mock_model = mock.create_autospec(
339+
generative_models.GenerativeModel, instance=True
340+
)
341+
mock_model.generate_content.return_value = _MOCK_MODEL_INFERENCE_RESPONSE
342+
mock_model._model_name = "publishers/google/model/gemini-1.0-pro"
343+
test_metrics = [
344+
_summarization_quality.SummarizationQuality(
345+
use_reference=True,
346+
)
347+
]
348+
test_eval_task = evaluation.EvalTask(dataset=eval_dataset, metrics=test_metrics)
349+
mock_metric_results = _MOCK_SUMMARIZATION_QUALITY_RESULT
350+
with mock.patch.object(
351+
target=gapic_evaluation_services.EvaluationServiceAsyncClient,
352+
attribute="evaluate_instances",
353+
side_effect=mock_metric_results,
354+
):
355+
test_result = test_eval_task.evaluate(
356+
model=mock_model,
357+
prompt_template="{instruction} test prompt template {context}",
358+
)
359+
360+
assert test_result.summary_metrics["row_count"] == 2
361+
assert test_result.summary_metrics["summarization_quality/mean"] == 4.5
362+
assert test_result.summary_metrics[
363+
"summarization_quality/std"
364+
] == pytest.approx(0.7, 0.1)
365+
assert set(test_result.metrics_table.columns.values) == set(
366+
[
367+
"context",
368+
"instruction",
369+
"reference",
370+
"completed_prompt",
371+
"response",
372+
"summarization_quality",
373+
"summarization_quality/explanation",
374+
"summarization_quality/confidence",
375+
]
376+
)
377+
assert list(test_result.metrics_table["summarization_quality"].values) == [5, 4]
378+
assert list(
379+
test_result.metrics_table["summarization_quality/explanation"].values
380+
) == [
381+
"explanation",
382+
"explanation",
383+
]
384+
assert list(
385+
test_result.metrics_table["summarization_quality/confidence"].values
386+
) == [
387+
1.0,
388+
0.5,
389+
]
390+
321391
@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
322392
def test_compute_pairwise_metrics_with_model_inference(self, api_transport):
323393
aiplatform.init(

vertexai/preview/evaluation/metrics/__init__.py

+69-1
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,84 @@
1616
#
1717
"""Evaluation Metrics Module."""
1818

19+
from vertexai.preview.evaluation.metrics import _base
20+
from vertexai.preview.evaluation.metrics import _coherence
21+
from vertexai.preview.evaluation.metrics import _fluency
22+
from vertexai.preview.evaluation.metrics import _fulfillment
23+
from vertexai.preview.evaluation.metrics import _groundedness
1924
from vertexai.preview.evaluation.metrics import (
20-
_base,
25+
_pairwise_question_answering_quality,
2126
)
27+
from vertexai.preview.evaluation.metrics import (
28+
_pairwise_summarization_quality,
29+
)
30+
from vertexai.preview.evaluation.metrics import (
31+
_question_answering_correctness,
32+
)
33+
from vertexai.preview.evaluation.metrics import (
34+
_question_answering_helpfulness,
35+
)
36+
from vertexai.preview.evaluation.metrics import (
37+
_question_answering_quality,
38+
)
39+
from vertexai.preview.evaluation.metrics import (
40+
_question_answering_relevance,
41+
)
42+
from vertexai.preview.evaluation.metrics import _safety
43+
from vertexai.preview.evaluation.metrics import (
44+
_summarization_helpfulness,
45+
)
46+
from vertexai.preview.evaluation.metrics import (
47+
_summarization_quality,
48+
)
49+
from vertexai.preview.evaluation.metrics import (
50+
_summarization_verbosity,
51+
)
52+
2253

2354
CustomMetric = _base.CustomMetric
2455
PairwiseMetric = _base.PairwiseMetric
2556
make_metric = _base.make_metric
2657

58+
Coherence = _coherence.Coherence
59+
Fluency = _fluency.Fluency
60+
Safety = _safety.Safety
61+
Groundedness = _groundedness.Groundedness
62+
Fulfillment = _fulfillment.Fulfillment
63+
SummarizationQuality = _summarization_quality.SummarizationQuality
64+
SummarizationHelpfulness = _summarization_helpfulness.SummarizationHelpfulness
65+
SummarizationVerbosity = _summarization_verbosity.SummarizationVerbosity
66+
QuestionAnsweringQuality = _question_answering_quality.QuestionAnsweringQuality
67+
QuestionAnsweringRelevance = _question_answering_relevance.QuestionAnsweringRelevance
68+
QuestionAnsweringHelpfulness = (
69+
_question_answering_helpfulness.QuestionAnsweringHelpfulness
70+
)
71+
QuestionAnsweringCorrectness = (
72+
_question_answering_correctness.QuestionAnsweringCorrectness
73+
)
74+
PairwiseSummarizationQuality = (
75+
_pairwise_summarization_quality.PairwiseSummarizationQuality
76+
)
77+
PairwiseQuestionAnsweringQuality = (
78+
_pairwise_question_answering_quality.PairwiseQuestionAnsweringQuality
79+
)
80+
2781
__all__ = [
2882
"CustomMetric",
2983
"PairwiseMetric",
3084
"make_metric",
85+
"Coherence",
86+
"Fluency",
87+
"Safety",
88+
"Groundedness",
89+
"Fulfillment",
90+
"SummarizationQuality",
91+
"SummarizationHelpfulness",
92+
"SummarizationVerbosity",
93+
"QuestionAnsweringQuality",
94+
"QuestionAnsweringRelevance",
95+
"QuestionAnsweringHelpfulness",
96+
"QuestionAnsweringCorrectness",
97+
"PairwiseSummarizationQuality",
98+
"PairwiseQuestionAnsweringQuality",
3199
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
from typing import Optional
18+
from vertexai.preview.evaluation import constants
19+
from vertexai.preview.evaluation.metrics import _base
20+
21+
22+
class Coherence(_base._ModelBasedMetric):
23+
"""The model-based pointwise metric for Coherence."""
24+
25+
_metric_name = constants.Metric.COHERENCE
26+
27+
def __init__(self, *, version: Optional[int] = None):
28+
super().__init__(
29+
metric=Coherence._metric_name,
30+
version=version,
31+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
from typing import Optional
18+
from vertexai.preview.evaluation import constants
19+
from vertexai.preview.evaluation.metrics import _base
20+
21+
22+
class Fluency(_base._ModelBasedMetric):
23+
"""The model-based pointwise metric for Fluency."""
24+
25+
_metric_name = constants.Metric.FLUENCY
26+
27+
def __init__(self, *, version: Optional[int] = None):
28+
super().__init__(
29+
metric=Fluency._metric_name,
30+
version=version,
31+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
from typing import Optional
18+
from vertexai.preview.evaluation import constants
19+
from vertexai.preview.evaluation.metrics import _base
20+
21+
22+
class Fulfillment(_base._ModelBasedMetric):
23+
"""The model-based pointwise metric for Fulfillment."""
24+
25+
_metric_name = constants.Metric.FULFILLMENT
26+
27+
def __init__(self, *, version: Optional[int] = None):
28+
super().__init__(
29+
metric=Fulfillment._metric_name,
30+
version=version,
31+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
from typing import Optional
18+
from vertexai.preview.evaluation import constants
19+
from vertexai.preview.evaluation.metrics import _base
20+
21+
22+
class Groundedness(_base._ModelBasedMetric):
23+
"""The model-based pointwise metric for Groundedness."""
24+
25+
_metric_name = constants.Metric.GROUNDEDNESS
26+
27+
def __init__(self, *, version: Optional[int] = None):
28+
super().__init__(
29+
metric=Groundedness._metric_name,
30+
version=version,
31+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
from typing import Optional
19+
from vertexai.preview.evaluation import constants
20+
from vertexai.preview.evaluation.metrics import _base
21+
22+
23+
class QuestionAnsweringCorrectness(_base._ModelBasedMetric):
24+
"""The model-based pointwise metric for Question Answering Correctness."""
25+
26+
_metric_name = constants.Metric.QUESTION_ANSWERING_CORRECTNESS
27+
28+
def __init__(self, *, use_reference: bool = True, version: Optional[int] = None):
29+
super().__init__(
30+
metric=QuestionAnsweringCorrectness._metric_name,
31+
use_reference=use_reference,
32+
version=version,
33+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
from typing import Optional
19+
from vertexai.preview.evaluation import constants
20+
from vertexai.preview.evaluation.metrics import _base
21+
22+
23+
class QuestionAnsweringHelpfulness(_base._ModelBasedMetric):
24+
"""The model-based pointwise metric for Question Answering Helpfulness."""
25+
26+
_metric_name = constants.Metric.QUESTION_ANSWERING_HELPFULNESS
27+
28+
def __init__(self, *, use_reference: bool = False, version: Optional[int] = None):
29+
super().__init__(
30+
metric=QuestionAnsweringHelpfulness._metric_name,
31+
use_reference=use_reference,
32+
version=version,
33+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
from typing import Optional
19+
from vertexai.preview.evaluation import constants
20+
from vertexai.preview.evaluation.metrics import _base
21+
22+
23+
class QuestionAnsweringQuality(_base._ModelBasedMetric):
24+
"""The model-based pointwise metric for Question Answering Quality."""
25+
26+
_metric_name = constants.Metric.QUESTION_ANSWERING_QUALITY
27+
28+
def __init__(self, *, use_reference: bool = False, version: Optional[int] = None):
29+
super().__init__(
30+
metric=QuestionAnsweringQuality._metric_name,
31+
use_reference=use_reference,
32+
version=version,
33+
)

0 commit comments

Comments
 (0)