@@ -2041,6 +2041,61 @@ def test_initialize_metric_column_mapping(self):
2041
2041
)
2042
2042
assert converted_metric_column_mapping == _EXPECTED_COLUMN_MAPPING
2043
2043
2044
+ def test_upload_results (self , mock_storage_blob_from_string ):
2045
+ with mock .patch ("json.dump" ) as mock_json_dump :
2046
+ evaluation .utils .upload_evaluation_results (
2047
+ MOCK_EVAL_RESULT ,
2048
+ _TEST_BUCKET ,
2049
+ _TEST_FILE_NAME ,
2050
+ "candidate_model" ,
2051
+ "baseline_model" ,
2052
+ "gs://test-bucket/test-dataset.csv" ,
2053
+ [_TEST_POINTWISE_METRIC , _TEST_PAIRWISE_METRIC ],
2054
+ )
2055
+
2056
+ mock_storage_blob_from_string .assert_any_call (
2057
+ uri = "gs://test-bucket/test-file-name/test-file-name.csv" ,
2058
+ client = mock .ANY ,
2059
+ )
2060
+ mock_storage_blob_from_string .assert_any_call (
2061
+ uri = "gs://test-bucket/test-file-name/summary_metrics.json" ,
2062
+ client = mock .ANY ,
2063
+ )
2064
+ mock_json_dump .assert_called_once_with (
2065
+ {
2066
+ "summary_metrics" : MOCK_EVAL_RESULT .summary_metrics ,
2067
+ "candidate_model_name" : "candidate_model" ,
2068
+ "baseline_model_name" : "baseline_model" ,
2069
+ "dataset_uri" : "gs://test-bucket/test-dataset.csv" ,
2070
+ "metric_descriptions" : {
2071
+ "test_pointwise_metric" : {
2072
+ "criteria" : _CRITERIA ,
2073
+ "rating_rubric" : _POINTWISE_RATING_RUBRIC ,
2074
+ },
2075
+ "test_pairwise_metric" : {
2076
+ "criteria" : _CRITERIA ,
2077
+ "rating_rubric" : _PAIRWISE_RATING_RUBRIC ,
2078
+ },
2079
+ },
2080
+ },
2081
+ mock .ANY ,
2082
+ )
2083
+
2084
+ def test_upload_results_with_default_file_name (self , mock_storage_blob_from_string ):
2085
+ with mock .patch .object (
2086
+ aiplatform_utils , "timestamped_unique_name"
2087
+ ) as mock_timestamped_unique_name :
2088
+ mock_timestamped_unique_name .return_value = "2025-02-10-12-00-00-12345"
2089
+ evaluation .utils .upload_evaluation_results (
2090
+ MOCK_EVAL_RESULT ,
2091
+ _TEST_BUCKET ,
2092
+ )
2093
+
2094
+ mock_storage_blob_from_string .assert_any_call (
2095
+ uri = "gs://test-bucket/eval_results_2025-02-10-12-00-00-12345/eval_results_2025-02-10-12-00-00-12345.csv" ,
2096
+ client = mock .ANY ,
2097
+ )
2098
+
2044
2099
2045
2100
class TestPromptTemplate :
2046
2101
def test_init (self ):
@@ -2138,57 +2193,31 @@ def test_pairtwise_metric_prompt_template_with_default_values(self):
2138
2193
== _EXPECTED_PAIRWISE_PROMPT_TEMPLATE_WITH_DEFAULT_VALUES .strip ()
2139
2194
)
2140
2195
2141
- def test_upload_results (self , mock_storage_blob_from_string ):
2142
- with mock .patch ("json.dump" ) as mock_json_dump :
2143
- evaluation .utils .upload_evaluation_results (
2144
- MOCK_EVAL_RESULT ,
2145
- _TEST_BUCKET ,
2146
- _TEST_FILE_NAME ,
2147
- "candidate_model" ,
2148
- "baseline_model" ,
2149
- "gs://test-bucket/test-dataset.csv" ,
2150
- [_TEST_POINTWISE_METRIC , _TEST_PAIRWISE_METRIC ],
2151
- )
2152
-
2153
- mock_storage_blob_from_string .assert_any_call (
2154
- uri = "gs://test-bucket/test-file-name/test-file-name.csv" ,
2155
- client = mock .ANY ,
2156
- )
2157
- mock_storage_blob_from_string .assert_any_call (
2158
- uri = "gs://test-bucket/test-file-name/summary_metrics.json" ,
2159
- client = mock .ANY ,
2160
- )
2161
- mock_json_dump .assert_called_once_with (
2162
- {
2163
- "summary_metrics" : MOCK_EVAL_RESULT .summary_metrics ,
2164
- "candidate_model_name" : "candidate_model" ,
2165
- "baseline_model_name" : "baseline_model" ,
2166
- "dataset_uri" : "gs://test-bucket/test-dataset.csv" ,
2167
- "metric_descriptions" : {
2168
- "test_pointwise_metric" : {
2169
- "criteria" : _CRITERIA ,
2170
- "rating_rubric" : _POINTWISE_RATING_RUBRIC ,
2171
- },
2172
- "test_pairwise_metric" : {
2173
- "criteria" : _CRITERIA ,
2174
- "rating_rubric" : _PAIRWISE_RATING_RUBRIC ,
2175
- },
2176
- },
2177
- },
2178
- mock .ANY ,
2179
- )
2180
-
2181
- def test_upload_results_with_default_file_name (self , mock_storage_blob_from_string ):
2182
- with mock .patch .object (
2183
- aiplatform_utils , "timestamped_unique_name"
2184
- ) as mock_timestamped_unique_name :
2185
- mock_timestamped_unique_name .return_value = "2025-02-10-12-00-00-12345"
2186
- evaluation .utils .upload_evaluation_results (
2187
- MOCK_EVAL_RESULT ,
2188
- _TEST_BUCKET ,
2189
- )
2190
-
2191
- mock_storage_blob_from_string .assert_any_call (
2192
- uri = "gs://test-bucket/eval_results_2025-02-10-12-00-00-12345/eval_results_2025-02-10-12-00-00-12345.csv" ,
2193
- client = mock .ANY ,
2194
- )
2196
+ def test_complex_prompt_template_variables (self ):
2197
+ template_str = """Metric prompt template
2198
+ instructions ...
2199
+ Here are some JSON structures
2200
+ {
2201
+ "Function API spec": You may use default python libraries,
2202
+ "example": test test
2203
+ }
2204
+ Output format prompt with JSON:
2205
+ The answer should be a json alone which follows the json structure below:
2206
+ {
2207
+ "is_the_response_valid": [valid or invalid],
2208
+ "reasoning":
2209
+ "rewritten response":
2210
+ }
2211
+ Here are some actual variables:
2212
+ {var_1} {var2} {_var_3} {
2213
+ var_5_mutli_line
2214
+ } {VAR_6} {7_var} {{var_9}}
2215
+ """
2216
+ prompt_template = evaluation .PromptTemplate (template_str )
2217
+ assert prompt_template .variables == {
2218
+ "var_1" ,
2219
+ "var2" ,
2220
+ "_var_3" ,
2221
+ "VAR_6" ,
2222
+ "var_9" ,
2223
+ }
0 commit comments