Skip to content

Commit 905c766

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Fix file type mismatch in uploading eval results to GCS, supported types: CSV, JSON.
PiperOrigin-RevId: 698873912
1 parent 97df5fc commit 905c766

File tree

2 files changed

+69
-13
lines changed

2 files changed

+69
-13
lines changed

tests/unit/vertexai/test_evaluation.py

+24
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@
6868

6969
_TEST_PROJECT = "test-project"
7070
_TEST_LOCATION = "us-central1"
71+
_TEST_BUCKET = "gs://test-bucket"
72+
_TEST_FILE_NAME = "test-file-name.csv"
7173
_AUTORATER_INSTRUCTION = """
7274
You are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.
7375
"""
@@ -181,6 +183,12 @@
181183
text,text,text\n
182184
"""
183185
_TEST_EXPERIMENT = "test-experiment"
186+
_TEST_CSV = pd.DataFrame(
187+
columns={
188+
"response": ["text"],
189+
"reference": ["ref"],
190+
}
191+
)
184192
_EXPECTED_POINTWISE_PROMPT_TEMPLATE = """
185193
# Instruction
186194
hello
@@ -549,6 +557,16 @@ def mock_experiment_tracker():
549557
yield mock_experiment_tracker
550558

551559

560+
@pytest.fixture
561+
def mock_storage_blob_upload_from_filename():
562+
with mock.patch(
563+
"google.cloud.storage.Blob.upload_from_filename"
564+
) as mock_blob_upload_from_filename, mock.patch(
565+
"google.cloud.storage.Bucket.exists", return_value=True
566+
):
567+
yield mock_blob_upload_from_filename
568+
569+
552570
@pytest.mark.usefixtures("google_auth_mock")
553571
class TestEvaluation:
554572
def setup_method(self):
@@ -1929,3 +1947,9 @@ def test_pairtwise_metric_prompt_template_with_default_values(self):
19291947
str(pairwise_metric_prompt_template)
19301948
== _EXPECTED_PAIRWISE_PROMPT_TEMPLATE_WITH_DEFAULT_VALUES.strip()
19311949
)
1950+
1951+
def test_upload_results(self, mock_storage_blob_upload_from_filename):
1952+
evaluation.utils.upload_evaluation_results(
1953+
_TEST_CSV, _TEST_BUCKET, _TEST_FILE_NAME
1954+
)
1955+
assert mock_storage_blob_upload_from_filename.called_once_with(_TEST_CSV)

vertexai/evaluation/utils.py

+45-13
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
import functools
1919
import io
2020
import os
21+
import tempfile
2122
import threading
2223
import time
23-
from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable
24+
from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable, Literal
2425

2526
from google.cloud import bigquery
2627
from google.cloud import storage
@@ -250,25 +251,56 @@ def _read_gcs_file_contents(filepath: str) -> str:
250251
return blob.download_as_string().decode("utf-8")
251252

252253

254+
def _upload_pandas_df_to_gcs(
255+
df: "pd.DataFrame", upload_gcs_path: str, file_type: Literal["csv", "jsonl"]
256+
) -> None:
257+
"""Uploads the provided Pandas DataFrame to a GCS bucket.
258+
259+
Args:
260+
df: The Pandas DataFrame to upload.
261+
upload_gcs_path: The GCS path to upload the data file.
262+
file_type: The file type of the data file.
263+
"""
264+
265+
with tempfile.TemporaryDirectory() as temp_dir:
266+
if file_type == "csv":
267+
local_dataset_path = os.path.join(temp_dir, "metrics_table.csv")
268+
df.to_csv(path_or_buf=local_dataset_path)
269+
elif file_type == "jsonl":
270+
local_dataset_path = os.path.join(temp_dir, "metrics_table.jsonl")
271+
df.to_json(path_or_buf=local_dataset_path, orient="records", lines=True)
272+
else:
273+
raise ValueError(
274+
f"Unsupported file type: {file_type} from {upload_gcs_path}."
275+
" Please provide a valid GCS path with `jsonl` or `csv` suffix."
276+
)
277+
278+
storage_client = storage.Client(
279+
project=initializer.global_config.project,
280+
credentials=initializer.global_config.credentials,
281+
)
282+
storage.Blob.from_string(
283+
uri=upload_gcs_path, client=storage_client
284+
).upload_from_filename(filename=local_dataset_path)
285+
286+
253287
def upload_evaluation_results(
254288
dataset: "pd.DataFrame", destination_uri_prefix: str, file_name: str
255-
):
256-
"""Uploads eval results to GCS CSV destination."""
257-
supported_file_types = ["csv"]
289+
) -> None:
290+
"""Uploads eval results to GCS destination.
291+
292+
Args:
293+
dataset: Pandas dataframe to upload.
294+
destination_uri_prefix: GCS folder to store the data.
295+
file_name: File name to store the data.
296+
"""
258297
if not destination_uri_prefix:
259298
return
260299
if destination_uri_prefix.startswith(_GCS_PREFIX):
261300
_, extension = os.path.splitext(file_name)
262301
file_type = extension.lower()[1:]
263-
if file_type in supported_file_types:
264-
output_path = destination_uri_prefix + "/" + file_name
265-
utils.gcs_utils._upload_pandas_df_to_gcs(dataset, output_path)
266-
else:
267-
raise ValueError(
268-
"Unsupported file type in the GCS destination URI:"
269-
f" {file_name}, please provide a valid GCS"
270-
f" file name with a file type in {supported_file_types}."
271-
)
302+
output_path = destination_uri_prefix + "/" + file_name
303+
_upload_pandas_df_to_gcs(dataset, output_path, file_type)
272304
else:
273305
raise ValueError(
274306
f"Unsupported destination URI: {destination_uri_prefix}."

0 commit comments

Comments
 (0)